xref: /openbmc/qemu/qemu-img.c (revision c1563952521d4d5947b4279efe8dcc432791287e)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu/help-texts.h"
29 #include "qemu/qemu-progress.h"
30 #include "qemu-version.h"
31 #include "qapi/error.h"
32 #include "qapi/qapi-commands-block-core.h"
33 #include "qapi/qapi-visit-block-core.h"
34 #include "qapi/qobject-output-visitor.h"
35 #include "qobject/qjson.h"
36 #include "qobject/qdict.h"
37 #include "qemu/cutils.h"
38 #include "qemu/config-file.h"
39 #include "qemu/option.h"
40 #include "qemu/error-report.h"
41 #include "qemu/log.h"
42 #include "qemu/main-loop.h"
43 #include "qemu/module.h"
44 #include "qemu/sockets.h"
45 #include "qemu/units.h"
46 #include "qemu/memalign.h"
47 #include "qom/object_interfaces.h"
48 #include "system/block-backend.h"
49 #include "block/block_int.h"
50 #include "block/blockjob.h"
51 #include "block/dirty-bitmap.h"
52 #include "block/qapi.h"
53 #include "crypto/init.h"
54 #include "trace/control.h"
55 #include "qemu/throttle.h"
56 #include "block/throttle-groups.h"
57 
58 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
59                           "\n" QEMU_COPYRIGHT "\n"
60 
61 typedef struct img_cmd_t {
62     const char *name;
63     int (*handler)(const struct img_cmd_t *ccmd, int argc, char **argv);
64 } img_cmd_t;
65 
66 enum {
67     OPTION_OUTPUT = 256,
68     OPTION_BACKING_CHAIN = 257,
69     OPTION_OBJECT = 258,
70     OPTION_IMAGE_OPTS = 259,
71     OPTION_PATTERN = 260,
72     OPTION_FLUSH_INTERVAL = 261,
73     OPTION_NO_DRAIN = 262,
74     OPTION_TARGET_IMAGE_OPTS = 263,
75     OPTION_SIZE = 264,
76     OPTION_PREALLOCATION = 265,
77     OPTION_SHRINK = 266,
78     OPTION_SALVAGE = 267,
79     OPTION_TARGET_IS_ZERO = 268,
80     OPTION_ADD = 269,
81     OPTION_REMOVE = 270,
82     OPTION_CLEAR = 271,
83     OPTION_ENABLE = 272,
84     OPTION_DISABLE = 273,
85     OPTION_MERGE = 274,
86     OPTION_BITMAPS = 275,
87     OPTION_FORCE = 276,
88     OPTION_SKIP_BROKEN = 277,
89 };
90 
91 typedef enum OutputFormat {
92     OFORMAT_JSON,
93     OFORMAT_HUMAN,
94 } OutputFormat;
95 
96 /* Default to cache=writeback as data integrity is not important for qemu-img */
97 #define BDRV_DEFAULT_CACHE "writeback"
98 
99 static void format_print(void *opaque, const char *name)
100 {
101     printf(" %s", name);
102 }
103 
104 static G_NORETURN
105 void tryhelp(const char *argv0)
106 {
107     error_printf("Try '%s --help' for more information\n", argv0);
108     exit(EXIT_FAILURE);
109 }
110 
111 static G_NORETURN G_GNUC_PRINTF(2, 3)
112 void error_exit(const char *argv0, const char *fmt, ...)
113 {
114     va_list ap;
115 
116     va_start(ap, fmt);
117     error_vreport(fmt, ap);
118     va_end(ap);
119 
120     tryhelp(argv0);
121 }
122 
123 static G_NORETURN
124 void missing_argument(const char *option)
125 {
126     error_exit("qemu-img", "missing argument for option '%s'", option);
127 }
128 
129 static G_NORETURN
130 void unrecognized_option(const char *option)
131 {
132     error_exit("qemu-img", "unrecognized option '%s'", option);
133 }
134 
135 /*
136  * Print --help output for a command and exit.
137  * @syntax and @description are multi-line with trailing EOL
138  * (to allow easy extending of the text)
139  * @syntax has each subsequent line indented by 8 chars.
140  * @description is indented by 2 chars for argument on each own line,
141  * and with 5 chars for argument description (like -h arg below).
142  */
143 static G_NORETURN
144 void cmd_help(const img_cmd_t *ccmd,
145               const char *syntax, const char *arguments)
146 {
147     printf(
148 "Usage:\n"
149 "\n"
150 "  %s %s %s"
151 "\n"
152 "Arguments:\n"
153 "  -h, --help\n"
154 "     print this help and exit\n"
155 "%s\n",
156            "qemu-img", ccmd->name,
157            syntax, arguments);
158     exit(EXIT_SUCCESS);
159 }
160 
161 static OutputFormat parse_output_format(const char *argv0, const char *arg)
162 {
163     if (!strcmp(arg, "json")) {
164         return OFORMAT_JSON;
165     } else if (!strcmp(arg, "human")) {
166         return OFORMAT_HUMAN;
167     } else {
168         error_exit(argv0, "--output expects 'human' or 'json', not '%s'", arg);
169     }
170 }
171 
172 /* Please keep in synch with docs/tools/qemu-img.rst */
173 static G_NORETURN
174 void help(void)
175 {
176     const char *help_msg =
177            QEMU_IMG_VERSION
178            "usage: qemu-img [standard options] command [command options]\n"
179            "QEMU disk image utility\n"
180            "\n"
181            "    '-h', '--help'       display this help and exit\n"
182            "    '-V', '--version'    output version information and exit\n"
183            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
184            "                         specify tracing options\n"
185            "\n"
186            "Command syntax:\n"
187 #define DEF(option, callback, arg_string)        \
188            "  " arg_string "\n"
189 #include "qemu-img-cmds.h"
190 #undef DEF
191            "\n"
192            "Command parameters:\n"
193            "  'filename' is a disk image filename\n"
194            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
195            "    manual page for a description of the object properties. The most common\n"
196            "    object type is a 'secret', which is used to supply passwords and/or\n"
197            "    encryption keys.\n"
198            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
199            "  'cache' is the cache mode used to write the output disk image, the valid\n"
200            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
201            "    'directsync' and 'unsafe' (default for convert)\n"
202            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
203            "    options are the same as for the 'cache' option\n"
204            "  'size' is the disk image size in bytes. Optional suffixes\n"
205            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
206            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
207            "    supported. 'b' is ignored.\n"
208            "  'output_filename' is the destination disk image filename\n"
209            "  'output_fmt' is the destination format\n"
210            "  'options' is a comma separated list of format specific options in a\n"
211            "    name=value format. Use -o help for an overview of the options supported by\n"
212            "    the used format\n"
213            "  'snapshot_param' is param used for internal snapshot, format\n"
214            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
215            "    '[ID_OR_NAME]'\n"
216            "  '-c' indicates that target image must be compressed (qcow format only)\n"
217            "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
218            "       new backing file match exactly. The image doesn't need a working\n"
219            "       backing file before rebasing in this case (useful for renaming the\n"
220            "       backing file). For image creation, allow creating without attempting\n"
221            "       to open the backing file.\n"
222            "  '-h' with or without a command shows this help and lists the supported formats\n"
223            "  '-p' show progress of command (only certain commands)\n"
224            "  '-q' use Quiet mode - do not print any output (except errors)\n"
225            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
226            "       contain only zeros for qemu-img to create a sparse image during\n"
227            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
228            "       unallocated or zero sectors, and the destination image will always be\n"
229            "       fully allocated\n"
230            "  '--output' takes the format in which the output must be done (human or json)\n"
231            "  '-n' skips the target volume creation (useful if the volume is created\n"
232            "       prior to running qemu-img)\n"
233            "\n"
234            "Parameters to bitmap subcommand:\n"
235            "  'bitmap' is the name of the bitmap to manipulate, through one or more\n"
236            "       actions from '--add', '--remove', '--clear', '--enable', '--disable',\n"
237            "       or '--merge source'\n"
238            "  '-g granularity' sets the granularity for '--add' actions\n"
239            "  '-b source' and '-F src_fmt' tell '--merge' actions to find the source\n"
240            "       bitmaps from an alternative file\n"
241            "\n"
242            "Parameters to check subcommand:\n"
243            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
244            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
245            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
246            "       hiding corruption that has already occurred.\n"
247            "\n"
248            "Parameters to convert subcommand:\n"
249            "  '--bitmaps' copies all top-level persistent bitmaps to destination\n"
250            "  '-m' specifies how many coroutines work in parallel during the convert\n"
251            "       process (defaults to 8)\n"
252            "  '-W' allow to write to the target out of order rather than sequential\n"
253            "\n"
254            "Parameters to snapshot subcommand:\n"
255            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
256            "  '-a' applies a snapshot (revert disk to saved state)\n"
257            "  '-c' creates a snapshot\n"
258            "  '-d' deletes a snapshot\n"
259            "  '-l' lists all snapshots in the given image\n"
260            "\n"
261            "Parameters to compare subcommand:\n"
262            "  '-f' first image format\n"
263            "  '-F' second image format\n"
264            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
265            "\n"
266            "Parameters to dd subcommand:\n"
267            "  'bs=BYTES' read and write up to BYTES bytes at a time "
268            "(default: 512)\n"
269            "  'count=N' copy only N input blocks\n"
270            "  'if=FILE' read from FILE\n"
271            "  'of=FILE' write to FILE\n"
272            "  'skip=N' skip N bs-sized blocks at the start of input\n";
273 
274     printf("%s\nSupported formats:", help_msg);
275     bdrv_iterate_format(format_print, NULL, false);
276     printf("\n\n" QEMU_HELP_BOTTOM "\n");
277     exit(EXIT_SUCCESS);
278 }
279 
280 /*
281  * Is @list safe for accumulate_options()?
282  * It is when multiple of them can be joined together separated by ','.
283  * To make that work, @list must not start with ',' (or else a
284  * separating ',' preceding it gets escaped), and it must not end with
285  * an odd number of ',' (or else a separating ',' following it gets
286  * escaped), or be empty (or else a separating ',' preceding it can
287  * escape a separating ',' following it).
288  *
289  */
290 static bool is_valid_option_list(const char *list)
291 {
292     size_t len = strlen(list);
293     size_t i;
294 
295     if (!list[0] || list[0] == ',') {
296         return false;
297     }
298 
299     for (i = len; i > 0 && list[i - 1] == ','; i--) {
300     }
301     if ((len - i) % 2) {
302         return false;
303     }
304 
305     return true;
306 }
307 
308 static int accumulate_options(char **options, char *list)
309 {
310     char *new_options;
311 
312     if (!is_valid_option_list(list)) {
313         error_report("Invalid option list: %s", list);
314         return -1;
315     }
316 
317     if (!*options) {
318         *options = g_strdup(list);
319     } else {
320         new_options = g_strdup_printf("%s,%s", *options, list);
321         g_free(*options);
322         *options = new_options;
323     }
324     return 0;
325 }
326 
327 static QemuOptsList qemu_source_opts = {
328     .name = "source",
329     .implied_opt_name = "file",
330     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
331     .desc = {
332         { }
333     },
334 };
335 
336 static int G_GNUC_PRINTF(2, 3) qprintf(bool quiet, const char *fmt, ...)
337 {
338     int ret = 0;
339     if (!quiet) {
340         va_list args;
341         va_start(args, fmt);
342         ret = vprintf(fmt, args);
343         va_end(args);
344     }
345     return ret;
346 }
347 
348 
349 static int print_block_option_help(const char *filename, const char *fmt)
350 {
351     BlockDriver *drv, *proto_drv;
352     QemuOptsList *create_opts = NULL;
353     Error *local_err = NULL;
354 
355     /* Find driver and parse its options */
356     drv = bdrv_find_format(fmt);
357     if (!drv) {
358         error_report("Unknown file format '%s'", fmt);
359         return 1;
360     }
361 
362     if (!drv->create_opts) {
363         error_report("Format driver '%s' does not support image creation", fmt);
364         return 1;
365     }
366 
367     create_opts = qemu_opts_append(create_opts, drv->create_opts);
368     if (filename) {
369         proto_drv = bdrv_find_protocol(filename, true, &local_err);
370         if (!proto_drv) {
371             error_report_err(local_err);
372             qemu_opts_free(create_opts);
373             return 1;
374         }
375         if (!proto_drv->create_opts) {
376             error_report("Protocol driver '%s' does not support image creation",
377                          proto_drv->format_name);
378             qemu_opts_free(create_opts);
379             return 1;
380         }
381         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
382     }
383 
384     if (filename) {
385         printf("Supported options:\n");
386     } else {
387         printf("Supported %s options:\n", fmt);
388     }
389     qemu_opts_print_help(create_opts, false);
390     qemu_opts_free(create_opts);
391 
392     if (!filename) {
393         printf("\n"
394                "The protocol level may support further options.\n"
395                "Specify the target filename to include those options.\n");
396     }
397 
398     return 0;
399 }
400 
401 
402 static BlockBackend *img_open_opts(const char *optstr,
403                                    QemuOpts *opts, int flags, bool writethrough,
404                                    bool quiet, bool force_share)
405 {
406     QDict *options;
407     Error *local_err = NULL;
408     BlockBackend *blk;
409     options = qemu_opts_to_qdict(opts, NULL);
410     if (force_share) {
411         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
412             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
413             error_report("--force-share/-U conflicts with image options");
414             qobject_unref(options);
415             return NULL;
416         }
417         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
418     }
419     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
420     if (!blk) {
421         error_reportf_err(local_err, "Could not open '%s': ", optstr);
422         return NULL;
423     }
424     blk_set_enable_write_cache(blk, !writethrough);
425 
426     return blk;
427 }
428 
429 static BlockBackend *img_open_file(const char *filename,
430                                    QDict *options,
431                                    const char *fmt, int flags,
432                                    bool writethrough, bool quiet,
433                                    bool force_share)
434 {
435     BlockBackend *blk;
436     Error *local_err = NULL;
437 
438     if (!options) {
439         options = qdict_new();
440     }
441     if (fmt) {
442         qdict_put_str(options, "driver", fmt);
443     }
444 
445     if (force_share) {
446         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
447     }
448     blk = blk_new_open(filename, NULL, options, flags, &local_err);
449     if (!blk) {
450         error_reportf_err(local_err, "Could not open '%s': ", filename);
451         return NULL;
452     }
453     blk_set_enable_write_cache(blk, !writethrough);
454 
455     return blk;
456 }
457 
458 
459 static int img_add_key_secrets(void *opaque,
460                                const char *name, const char *value,
461                                Error **errp)
462 {
463     QDict *options = opaque;
464 
465     if (g_str_has_suffix(name, "key-secret")) {
466         qdict_put_str(options, name, value);
467     }
468 
469     return 0;
470 }
471 
472 
473 static BlockBackend *img_open(bool image_opts,
474                               const char *filename,
475                               const char *fmt, int flags, bool writethrough,
476                               bool quiet, bool force_share)
477 {
478     BlockBackend *blk;
479     if (image_opts) {
480         QemuOpts *opts;
481         if (fmt) {
482             error_report("--image-opts and --format are mutually exclusive");
483             return NULL;
484         }
485         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
486                                        filename, true);
487         if (!opts) {
488             return NULL;
489         }
490         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
491                             force_share);
492     } else {
493         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
494                             force_share);
495     }
496 
497     if (blk) {
498         blk_set_force_allow_inactivate(blk);
499     }
500 
501     return blk;
502 }
503 
504 
505 static int add_old_style_options(const char *fmt, QemuOpts *opts,
506                                  const char *base_filename,
507                                  const char *base_fmt)
508 {
509     if (base_filename) {
510         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
511                           NULL)) {
512             error_report("Backing file not supported for file format '%s'",
513                          fmt);
514             return -1;
515         }
516     }
517     if (base_fmt) {
518         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
519             error_report("Backing file format not supported for file "
520                          "format '%s'", fmt);
521             return -1;
522         }
523     }
524     return 0;
525 }
526 
527 static int64_t cvtnum_full(const char *name, const char *value, int64_t min,
528                            int64_t max)
529 {
530     int err;
531     uint64_t res;
532 
533     err = qemu_strtosz(value, NULL, &res);
534     if (err < 0 && err != -ERANGE) {
535         error_report("Invalid %s specified. You may use "
536                      "k, M, G, T, P or E suffixes for", name);
537         error_report("kilobytes, megabytes, gigabytes, terabytes, "
538                      "petabytes and exabytes.");
539         return err;
540     }
541     if (err == -ERANGE || res > max || res < min) {
542         error_report("Invalid %s specified. Must be between %" PRId64
543                      " and %" PRId64 ".", name, min, max);
544         return -ERANGE;
545     }
546     return res;
547 }
548 
549 static int64_t cvtnum(const char *name, const char *value)
550 {
551     return cvtnum_full(name, value, 0, INT64_MAX);
552 }
553 
554 static int img_create(const img_cmd_t *ccmd, int argc, char **argv)
555 {
556     int c;
557     int64_t img_size = -1;
558     const char *fmt = "raw";
559     const char *base_fmt = NULL;
560     const char *filename;
561     const char *base_filename = NULL;
562     char *options = NULL;
563     Error *local_err = NULL;
564     bool quiet = false;
565     int flags = 0;
566 
567     for(;;) {
568         static const struct option long_options[] = {
569             {"help", no_argument, 0, 'h'},
570             {"format", required_argument, 0, 'f'},
571             {"options", required_argument, 0, 'o'},
572             {"backing", required_argument, 0, 'b'},
573             {"backing-format", required_argument, 0, 'B'}, /* was -F in 10.0 */
574             {"backing-unsafe", no_argument, 0, 'u'},
575             {"quiet", no_argument, 0, 'q'},
576             {"object", required_argument, 0, OPTION_OBJECT},
577             {0, 0, 0, 0}
578         };
579         c = getopt_long(argc, argv, "hf:o:b:F:B:uq",
580                         long_options, NULL);
581         if (c == -1) {
582             break;
583         }
584         switch(c) {
585         case 'h':
586             cmd_help(ccmd, "[-f FMT] [-o FMT_OPTS]\n"
587 "        [-b BACKING_FILE [-B BACKING_FMT]] [-u]\n"
588 "        [-q] [--object OBJDEF] FILE [SIZE]\n"
589 ,
590 "  -f, --format FMT\n"
591 "     specifies the format of the new image (default: raw)\n"
592 "  -o, --options FMT_OPTS\n"
593 "     format-specific options (specify '-o help' for help)\n"
594 "  -b, --backing BACKING_FILE\n"
595 "     create target image to be a CoW on top of BACKING_FILE\n"
596 "  -B, --backing-format BACKING_FMT (was -F in <= 10.0)\n"
597 "     specifies the format of BACKING_FILE (default: probing is used)\n"
598 "  -u, --backing-unsafe\n"
599 "     do not fail if BACKING_FILE can not be read\n"
600 "  -q, --quiet\n"
601 "     quiet mode (produce only error messages if any)\n"
602 "  --object OBJDEF\n"
603 "     defines QEMU user-creatable object\n"
604 "  FILE\n"
605 "     name of the image file to create (will be overritten if already exists)\n"
606 "  SIZE[bKMGTPE]\n"
607 "     image size with optional multiplier suffix (powers of 1024)\n"
608 "     (required unless BACKING_FILE is specified)\n"
609 );
610             break;
611         case 'f':
612             fmt = optarg;
613             break;
614         case 'o':
615             if (accumulate_options(&options, optarg) < 0) {
616                 goto fail;
617             }
618             break;
619         case 'b':
620             base_filename = optarg;
621             break;
622         case 'F': /* <=10.0 */
623         case 'B':
624             base_fmt = optarg;
625             break;
626         case 'u':
627             flags |= BDRV_O_NO_BACKING;
628             break;
629         case 'q':
630             quiet = true;
631             break;
632         case OPTION_OBJECT:
633             user_creatable_process_cmdline(optarg);
634             break;
635         default:
636             tryhelp(argv[0]);
637         }
638     }
639 
640     /* Get the filename */
641     filename = (optind < argc) ? argv[optind] : NULL;
642     if (options && has_help_option(options)) {
643         g_free(options);
644         return print_block_option_help(filename, fmt);
645     }
646 
647     if (optind >= argc) {
648         error_exit(argv[0], "Expecting image file name");
649     }
650     optind++;
651 
652     /* Get image size, if specified */
653     if (optind < argc) {
654         img_size = cvtnum("image size", argv[optind++]);
655         if (img_size < 0) {
656             goto fail;
657         }
658     }
659     if (optind != argc) {
660         error_exit(argv[0], "Unexpected argument: %s", argv[optind]);
661     }
662 
663     bdrv_img_create(filename, fmt, base_filename, base_fmt,
664                     options, img_size, flags, quiet, &local_err);
665     if (local_err) {
666         error_reportf_err(local_err, "%s: ", filename);
667         goto fail;
668     }
669 
670     g_free(options);
671     return 0;
672 
673 fail:
674     g_free(options);
675     return 1;
676 }
677 
678 static void dump_json_image_check(ImageCheck *check, bool quiet)
679 {
680     GString *str;
681     QObject *obj;
682     Visitor *v = qobject_output_visitor_new(&obj);
683 
684     visit_type_ImageCheck(v, NULL, &check, &error_abort);
685     visit_complete(v, &obj);
686     str = qobject_to_json_pretty(obj, true);
687     assert(str != NULL);
688     qprintf(quiet, "%s\n", str->str);
689     qobject_unref(obj);
690     visit_free(v);
691     g_string_free(str, true);
692 }
693 
694 static void dump_human_image_check(ImageCheck *check, bool quiet)
695 {
696     if (!(check->corruptions || check->leaks || check->check_errors)) {
697         qprintf(quiet, "No errors were found on the image.\n");
698     } else {
699         if (check->corruptions) {
700             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
701                     "Data may be corrupted, or further writes to the image "
702                     "may corrupt it.\n",
703                     check->corruptions);
704         }
705 
706         if (check->leaks) {
707             qprintf(quiet,
708                     "\n%" PRId64 " leaked clusters were found on the image.\n"
709                     "This means waste of disk space, but no harm to data.\n",
710                     check->leaks);
711         }
712 
713         if (check->check_errors) {
714             qprintf(quiet,
715                     "\n%" PRId64
716                     " internal errors have occurred during the check.\n",
717                     check->check_errors);
718         }
719     }
720 
721     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
722         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
723                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
724                 check->allocated_clusters, check->total_clusters,
725                 check->allocated_clusters * 100.0 / check->total_clusters,
726                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
727                 check->compressed_clusters * 100.0 /
728                 check->allocated_clusters);
729     }
730 
731     if (check->image_end_offset) {
732         qprintf(quiet,
733                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
734     }
735 }
736 
737 static int collect_image_check(BlockDriverState *bs,
738                    ImageCheck *check,
739                    const char *filename,
740                    const char *fmt,
741                    int fix)
742 {
743     int ret;
744     BdrvCheckResult result;
745 
746     ret = bdrv_check(bs, &result, fix);
747     if (ret < 0) {
748         return ret;
749     }
750 
751     check->filename                 = g_strdup(filename);
752     check->format                   = g_strdup(bdrv_get_format_name(bs));
753     check->check_errors             = result.check_errors;
754     check->corruptions              = result.corruptions;
755     check->has_corruptions          = result.corruptions != 0;
756     check->leaks                    = result.leaks;
757     check->has_leaks                = result.leaks != 0;
758     check->corruptions_fixed        = result.corruptions_fixed;
759     check->has_corruptions_fixed    = result.corruptions_fixed != 0;
760     check->leaks_fixed              = result.leaks_fixed;
761     check->has_leaks_fixed          = result.leaks_fixed != 0;
762     check->image_end_offset         = result.image_end_offset;
763     check->has_image_end_offset     = result.image_end_offset != 0;
764     check->total_clusters           = result.bfi.total_clusters;
765     check->has_total_clusters       = result.bfi.total_clusters != 0;
766     check->allocated_clusters       = result.bfi.allocated_clusters;
767     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
768     check->fragmented_clusters      = result.bfi.fragmented_clusters;
769     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
770     check->compressed_clusters      = result.bfi.compressed_clusters;
771     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
772 
773     return 0;
774 }
775 
776 /*
777  * Checks an image for consistency. Exit codes:
778  *
779  *  0 - Check completed, image is good
780  *  1 - Check not completed because of internal errors
781  *  2 - Check completed, image is corrupted
782  *  3 - Check completed, image has leaked clusters, but is good otherwise
783  * 63 - Checks are not supported by the image format
784  */
785 static int img_check(const img_cmd_t *ccmd, int argc, char **argv)
786 {
787     int c, ret;
788     OutputFormat output_format = OFORMAT_HUMAN;
789     const char *filename, *fmt, *cache;
790     BlockBackend *blk;
791     BlockDriverState *bs;
792     int fix = 0;
793     int flags = BDRV_O_CHECK;
794     bool writethrough;
795     ImageCheck *check;
796     bool quiet = false;
797     bool image_opts = false;
798     bool force_share = false;
799 
800     fmt = NULL;
801     cache = BDRV_DEFAULT_CACHE;
802 
803     for(;;) {
804         int option_index = 0;
805         static const struct option long_options[] = {
806             {"help", no_argument, 0, 'h'},
807             {"format", required_argument, 0, 'f'},
808             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
809             {"cache", required_argument, 0, 'T'},
810             {"repair", required_argument, 0, 'r'},
811             {"force-share", no_argument, 0, 'U'},
812             {"output", required_argument, 0, OPTION_OUTPUT},
813             {"quiet", no_argument, 0, 'q'},
814             {"object", required_argument, 0, OPTION_OBJECT},
815             {0, 0, 0, 0}
816         };
817         c = getopt_long(argc, argv, "hf:T:r:Uq",
818                         long_options, &option_index);
819         if (c == -1) {
820             break;
821         }
822         switch(c) {
823         case 'h':
824             cmd_help(ccmd, "[-f FMT | --image-opts] [-T CACHE_MODE] [-r leaks|all]\n"
825 "        [-U] [--output human|json] [-q] [--object OBJDEF] FILE\n"
826 ,
827 "  -f, --format FMT\n"
828 "     specifies the format of the image explicitly (default: probing is used)\n"
829 "  --image-opts\n"
830 "     treat FILE as an option string (key=value,..), not a file name\n"
831 "     (incompatible with -f|--format)\n"
832 "  -T, --cache CACHE_MODE\n" /* why not -t ? */
833 "     cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
834 "  -r, --repair leaks|all\n"
835 "     repair errors of the given category in the image (image will be\n"
836 "     opened in read-write mode, incompatible with -U|--force-share)\n"
837 "  -U, --force-share\n"
838 "     open image in shared mode for concurrent access\n"
839 "  --output human|json\n"
840 "     output format (default: human)\n"
841 "  -q, --quiet\n"
842 "     quiet mode (produce only error messages if any)\n"
843 "  --object OBJDEF\n"
844 "     defines QEMU user-creatable object\n"
845 "  FILE\n"
846 "     name of the image file, or an option string (key=value,..)\n"
847 "     with --image-opts, to operate on\n"
848 );
849             break;
850         case 'f':
851             fmt = optarg;
852             break;
853         case OPTION_IMAGE_OPTS:
854             image_opts = true;
855             break;
856         case 'T':
857             cache = optarg;
858             break;
859         case 'r':
860             flags |= BDRV_O_RDWR;
861 
862             if (!strcmp(optarg, "leaks")) {
863                 fix = BDRV_FIX_LEAKS;
864             } else if (!strcmp(optarg, "all")) {
865                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
866             } else {
867                 error_exit(argv[0],
868                            "--repair (-r) expects 'leaks' or 'all', not '%s'",
869                            optarg);
870             }
871             break;
872         case 'U':
873             force_share = true;
874             break;
875         case OPTION_OUTPUT:
876             output_format = parse_output_format(argv[0], optarg);
877             break;
878         case 'q':
879             quiet = true;
880             break;
881         case OPTION_OBJECT:
882             user_creatable_process_cmdline(optarg);
883             break;
884         default:
885             tryhelp(argv[0]);
886         }
887     }
888     if (optind != argc - 1) {
889         error_exit(argv[0], "Expecting one image file name");
890     }
891     filename = argv[optind++];
892 
893     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
894     if (ret < 0) {
895         error_report("Invalid source cache option: %s", cache);
896         return 1;
897     }
898 
899     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
900                    force_share);
901     if (!blk) {
902         return 1;
903     }
904     bs = blk_bs(blk);
905 
906     check = g_new0(ImageCheck, 1);
907     ret = collect_image_check(bs, check, filename, fmt, fix);
908 
909     if (ret == -ENOTSUP) {
910         error_report("This image format does not support checks");
911         ret = 63;
912         goto fail;
913     }
914 
915     if (check->corruptions_fixed || check->leaks_fixed) {
916         int corruptions_fixed, leaks_fixed;
917         bool has_leaks_fixed, has_corruptions_fixed;
918 
919         leaks_fixed         = check->leaks_fixed;
920         has_leaks_fixed     = check->has_leaks_fixed;
921         corruptions_fixed   = check->corruptions_fixed;
922         has_corruptions_fixed = check->has_corruptions_fixed;
923 
924         if (output_format == OFORMAT_HUMAN) {
925             qprintf(quiet,
926                     "The following inconsistencies were found and repaired:\n\n"
927                     "    %" PRId64 " leaked clusters\n"
928                     "    %" PRId64 " corruptions\n\n"
929                     "Double checking the fixed image now...\n",
930                     check->leaks_fixed,
931                     check->corruptions_fixed);
932         }
933 
934         qapi_free_ImageCheck(check);
935         check = g_new0(ImageCheck, 1);
936         ret = collect_image_check(bs, check, filename, fmt, 0);
937 
938         check->leaks_fixed          = leaks_fixed;
939         check->has_leaks_fixed      = has_leaks_fixed;
940         check->corruptions_fixed    = corruptions_fixed;
941         check->has_corruptions_fixed = has_corruptions_fixed;
942     }
943 
944     if (!ret) {
945         switch (output_format) {
946         case OFORMAT_HUMAN:
947             dump_human_image_check(check, quiet);
948             break;
949         case OFORMAT_JSON:
950             dump_json_image_check(check, quiet);
951             break;
952         }
953     }
954 
955     if (ret || check->check_errors) {
956         if (ret) {
957             error_report("Check failed: %s", strerror(-ret));
958         } else {
959             error_report("Check failed");
960         }
961         ret = 1;
962         goto fail;
963     }
964 
965     if (check->corruptions) {
966         ret = 2;
967     } else if (check->leaks) {
968         ret = 3;
969     } else {
970         ret = 0;
971     }
972 
973 fail:
974     qapi_free_ImageCheck(check);
975     blk_unref(blk);
976     return ret;
977 }
978 
979 typedef struct CommonBlockJobCBInfo {
980     BlockDriverState *bs;
981     Error **errp;
982 } CommonBlockJobCBInfo;
983 
984 static void common_block_job_cb(void *opaque, int ret)
985 {
986     CommonBlockJobCBInfo *cbi = opaque;
987 
988     if (ret < 0) {
989         error_setg_errno(cbi->errp, -ret, "Block job failed");
990     }
991 }
992 
993 static void run_block_job(BlockJob *job, Error **errp)
994 {
995     uint64_t progress_current, progress_total;
996     AioContext *aio_context = block_job_get_aio_context(job);
997     int ret = 0;
998 
999     job_lock();
1000     job_ref_locked(&job->job);
1001     do {
1002         float progress = 0.0f;
1003         job_unlock();
1004         aio_poll(aio_context, true);
1005 
1006         progress_get_snapshot(&job->job.progress, &progress_current,
1007                               &progress_total);
1008         if (progress_total) {
1009             progress = (float)progress_current / progress_total * 100.f;
1010         }
1011         qemu_progress_print(progress, 0);
1012         job_lock();
1013     } while (!job_is_ready_locked(&job->job) &&
1014              !job_is_completed_locked(&job->job));
1015 
1016     if (!job_is_completed_locked(&job->job)) {
1017         ret = job_complete_sync_locked(&job->job, errp);
1018     } else {
1019         ret = job->job.ret;
1020     }
1021     job_unref_locked(&job->job);
1022     job_unlock();
1023 
1024     /* publish completion progress only when success */
1025     if (!ret) {
1026         qemu_progress_print(100.f, 0);
1027     }
1028 }
1029 
1030 static int img_commit(const img_cmd_t *ccmd, int argc, char **argv)
1031 {
1032     int c, ret, flags;
1033     const char *filename, *fmt, *cache, *base;
1034     BlockBackend *blk;
1035     BlockDriverState *bs, *base_bs;
1036     BlockJob *job;
1037     bool progress = false, quiet = false, drop = false;
1038     bool writethrough;
1039     Error *local_err = NULL;
1040     CommonBlockJobCBInfo cbi;
1041     bool image_opts = false;
1042     int64_t rate_limit = 0;
1043 
1044     fmt = NULL;
1045     cache = BDRV_DEFAULT_CACHE;
1046     base = NULL;
1047     for(;;) {
1048         static const struct option long_options[] = {
1049             {"help", no_argument, 0, 'h'},
1050             {"format", required_argument, 0, 'f'},
1051             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1052             {"cache", required_argument, 0, 't'},
1053             {"drop", no_argument, 0, 'd'},
1054             {"base", required_argument, 0, 'b'},
1055             {"rate-limit", required_argument, 0, 'r'},
1056             {"progress", no_argument, 0, 'p'},
1057             {"quiet", no_argument, 0, 'q'},
1058             {"object", required_argument, 0, OPTION_OBJECT},
1059             {0, 0, 0, 0}
1060         };
1061         c = getopt_long(argc, argv, "hf:t:db:r:pq",
1062                         long_options, NULL);
1063         if (c == -1) {
1064             break;
1065         }
1066         switch(c) {
1067         case 'h':
1068             cmd_help(ccmd, "[-f FMT | --image-opts] [-t CACHE_MODE] [-b BASE_IMG]\n"
1069 "        [-d] [-r RATE] [-q] [--object OBJDEF] FILE\n"
1070 ,
1071 "  -f, --format FMT\n"
1072 "     specify FILE image format explicitly (default: probing is used)\n"
1073 "  --image-opts\n"
1074 "     treat FILE as an option string (key=value,..), not a file name\n"
1075 "     (incompatible with -f|--format)\n"
1076 "  -t, --cache CACHE_MODE image cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
1077 "  -d, --drop\n"
1078 "     skip emptying FILE on completion\n"
1079 "  -b, --base BASE_IMG\n"
1080 "     image in the backing chain to commit change to\n"
1081 "     (default: immediate backing file; implies --drop)\n"
1082 "  -r, --rate-limit RATE\n"
1083 "     I/O rate limit, in bytes per second\n"
1084 "  -p, --progress\n"
1085 "     display progress information\n"
1086 "  -q, --quiet\n"
1087 "     quiet mode (produce only error messages if any)\n"
1088 "  --object OBJDEF\n"
1089 "     defines QEMU user-creatable object\n"
1090 "  FILE\n"
1091 "     name of the image file, or an option string (key=value,..)\n"
1092 "     with --image-opts, to operate on\n"
1093 );
1094             break;
1095         case 'f':
1096             fmt = optarg;
1097             break;
1098         case OPTION_IMAGE_OPTS:
1099             image_opts = true;
1100             break;
1101         case 't':
1102             cache = optarg;
1103             break;
1104         case 'd':
1105             drop = true;
1106             break;
1107         case 'b':
1108             base = optarg;
1109             /* -b implies -d */
1110             drop = true;
1111             break;
1112         case 'r':
1113             rate_limit = cvtnum("rate limit", optarg);
1114             if (rate_limit < 0) {
1115                 return 1;
1116             }
1117             break;
1118         case 'p':
1119             progress = true;
1120             break;
1121         case 'q':
1122             quiet = true;
1123             break;
1124         case OPTION_OBJECT:
1125             user_creatable_process_cmdline(optarg);
1126             break;
1127         default:
1128             tryhelp(argv[0]);
1129         }
1130     }
1131 
1132     /* Progress is not shown in Quiet mode */
1133     if (quiet) {
1134         progress = false;
1135     }
1136 
1137     if (optind != argc - 1) {
1138         error_exit(argv[0], "Expecting one image file name");
1139     }
1140     filename = argv[optind++];
1141 
1142     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
1143     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1144     if (ret < 0) {
1145         error_report("Invalid cache option: %s", cache);
1146         return 1;
1147     }
1148 
1149     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1150                    false);
1151     if (!blk) {
1152         return 1;
1153     }
1154     bs = blk_bs(blk);
1155 
1156     qemu_progress_init(progress, 1.f);
1157     qemu_progress_print(0.f, 100);
1158 
1159     bdrv_graph_rdlock_main_loop();
1160     if (base) {
1161         base_bs = bdrv_find_backing_image(bs, base);
1162         if (!base_bs) {
1163             error_setg(&local_err,
1164                        "Did not find '%s' in the backing chain of '%s'",
1165                        base, filename);
1166             bdrv_graph_rdunlock_main_loop();
1167             goto done;
1168         }
1169     } else {
1170         /* This is different from QMP, which by default uses the deepest file in
1171          * the backing chain (i.e., the very base); however, the traditional
1172          * behavior of qemu-img commit is using the immediate backing file. */
1173         base_bs = bdrv_backing_chain_next(bs);
1174         if (!base_bs) {
1175             error_setg(&local_err, "Image does not have a backing file");
1176             bdrv_graph_rdunlock_main_loop();
1177             goto done;
1178         }
1179     }
1180     bdrv_graph_rdunlock_main_loop();
1181 
1182     cbi = (CommonBlockJobCBInfo){
1183         .errp = &local_err,
1184         .bs   = bs,
1185     };
1186 
1187     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit,
1188                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1189                         &cbi, false, &local_err);
1190     if (local_err) {
1191         goto done;
1192     }
1193 
1194     /* When the block job completes, the BlockBackend reference will point to
1195      * the old backing file. In order to avoid that the top image is already
1196      * deleted, so we can still empty it afterwards, increment the reference
1197      * counter here preemptively. */
1198     if (!drop) {
1199         bdrv_ref(bs);
1200     }
1201 
1202     job = block_job_get("commit");
1203     assert(job);
1204     run_block_job(job, &local_err);
1205     if (local_err) {
1206         goto unref_backing;
1207     }
1208 
1209     if (!drop) {
1210         BlockBackend *old_backing_blk;
1211 
1212         old_backing_blk = blk_new_with_bs(bs, BLK_PERM_WRITE, BLK_PERM_ALL,
1213                                           &local_err);
1214         if (!old_backing_blk) {
1215             goto unref_backing;
1216         }
1217         ret = blk_make_empty(old_backing_blk, &local_err);
1218         blk_unref(old_backing_blk);
1219         if (ret == -ENOTSUP) {
1220             error_free(local_err);
1221             local_err = NULL;
1222         } else if (ret < 0) {
1223             goto unref_backing;
1224         }
1225     }
1226 
1227 unref_backing:
1228     if (!drop) {
1229         bdrv_unref(bs);
1230     }
1231 
1232 done:
1233     qemu_progress_end();
1234 
1235     /*
1236      * Manually inactivate the image first because this way we can know whether
1237      * an error occurred. blk_unref() doesn't tell us about failures.
1238      */
1239     ret = bdrv_inactivate_all();
1240     if (ret < 0 && !local_err) {
1241         error_setg_errno(&local_err, -ret, "Error while closing the image");
1242     }
1243     blk_unref(blk);
1244 
1245     if (local_err) {
1246         error_report_err(local_err);
1247         return 1;
1248     }
1249 
1250     qprintf(quiet, "Image committed.\n");
1251     return 0;
1252 }
1253 
1254 /*
1255  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1256  * of the first sector boundary within buf where the sector contains a
1257  * non-zero byte.  This function is robust to a buffer that is not
1258  * sector-aligned.
1259  */
1260 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1261 {
1262     int64_t i;
1263     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1264 
1265     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1266         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1267             return i;
1268         }
1269     }
1270     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1271         return i;
1272     }
1273     return -1;
1274 }
1275 
1276 /*
1277  * Returns true iff the first sector pointed to by 'buf' contains at least
1278  * a non-NUL byte.
1279  *
1280  * 'pnum' is set to the number of sectors (including and immediately following
1281  * the first one) that are known to be in the same allocated/unallocated state.
1282  * The function will try to align the end offset to alignment boundaries so
1283  * that the request will at least end aligned and consecutive requests will
1284  * also start at an aligned offset.
1285  */
1286 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1287                                 int64_t sector_num, int alignment)
1288 {
1289     bool is_zero;
1290     int i, tail;
1291 
1292     if (n <= 0) {
1293         *pnum = 0;
1294         return 0;
1295     }
1296     is_zero = buffer_is_zero(buf, BDRV_SECTOR_SIZE);
1297     for(i = 1; i < n; i++) {
1298         buf += BDRV_SECTOR_SIZE;
1299         if (is_zero != buffer_is_zero(buf, BDRV_SECTOR_SIZE)) {
1300             break;
1301         }
1302     }
1303 
1304     if (i == n) {
1305         /*
1306          * The whole buf is the same.
1307          * No reason to split it into chunks, so return now.
1308          */
1309         *pnum = i;
1310         return !is_zero;
1311     }
1312 
1313     tail = (sector_num + i) & (alignment - 1);
1314     if (tail) {
1315         if (is_zero && i <= tail) {
1316             /*
1317              * For sure next sector after i is data, and it will rewrite this
1318              * tail anyway due to RMW. So, let's just write data now.
1319              */
1320             is_zero = false;
1321         }
1322         if (!is_zero) {
1323             /* If possible, align up end offset of allocated areas. */
1324             i += alignment - tail;
1325             i = MIN(i, n);
1326         } else {
1327             /*
1328              * For sure next sector after i is data, and it will rewrite this
1329              * tail anyway due to RMW. Better is avoid RMW and write zeroes up
1330              * to aligned bound.
1331              */
1332             i -= tail;
1333         }
1334     }
1335     *pnum = i;
1336     return !is_zero;
1337 }
1338 
1339 /*
1340  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1341  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1342  * breaking up write requests for only small sparse areas.
1343  */
1344 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1345     int min, int64_t sector_num, int alignment)
1346 {
1347     int ret;
1348     int num_checked, num_used;
1349 
1350     if (n < min) {
1351         min = n;
1352     }
1353 
1354     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1355     if (!ret) {
1356         return ret;
1357     }
1358 
1359     num_used = *pnum;
1360     buf += BDRV_SECTOR_SIZE * *pnum;
1361     n -= *pnum;
1362     sector_num += *pnum;
1363     num_checked = num_used;
1364 
1365     while (n > 0) {
1366         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1367 
1368         buf += BDRV_SECTOR_SIZE * *pnum;
1369         n -= *pnum;
1370         sector_num += *pnum;
1371         num_checked += *pnum;
1372         if (ret) {
1373             num_used = num_checked;
1374         } else if (*pnum >= min) {
1375             break;
1376         }
1377     }
1378 
1379     *pnum = num_used;
1380     return 1;
1381 }
1382 
1383 /*
1384  * Compares two buffers chunk by chunk, where @chsize is the chunk size.
1385  * If @chsize is 0, default chunk size of BDRV_SECTOR_SIZE is used.
1386  * Returns 0 if the first chunk of each buffer matches, non-zero otherwise.
1387  *
1388  * @pnum is set to the size of the buffer prefix aligned to @chsize that
1389  * has the same matching status as the first chunk.
1390  */
1391 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1392                            int64_t bytes, uint64_t chsize, int64_t *pnum)
1393 {
1394     bool res;
1395     int64_t i;
1396 
1397     assert(bytes > 0);
1398 
1399     if (!chsize) {
1400         chsize = BDRV_SECTOR_SIZE;
1401     }
1402     i = MIN(bytes, chsize);
1403 
1404     res = !!memcmp(buf1, buf2, i);
1405     while (i < bytes) {
1406         int64_t len = MIN(bytes - i, chsize);
1407 
1408         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1409             break;
1410         }
1411         i += len;
1412     }
1413 
1414     *pnum = i;
1415     return res;
1416 }
1417 
1418 #define IO_BUF_SIZE (2 * MiB)
1419 
1420 /*
1421  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1422  *
1423  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1424  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1425  * failure), and 4 on error (the exit status for read errors), after emitting
1426  * an error message.
1427  *
1428  * @param blk:  BlockBackend for the image
1429  * @param offset: Starting offset to check
1430  * @param bytes: Number of bytes to check
1431  * @param filename: Name of disk file we are checking (logging purpose)
1432  * @param buffer: Allocated buffer for storing read data
1433  * @param quiet: Flag for quiet mode
1434  */
1435 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1436                                int64_t bytes, const char *filename,
1437                                uint8_t *buffer, bool quiet)
1438 {
1439     int ret = 0;
1440     int64_t idx;
1441 
1442     ret = blk_pread(blk, offset, bytes, buffer, 0);
1443     if (ret < 0) {
1444         error_report("Error while reading offset %" PRId64 " of %s: %s",
1445                      offset, filename, strerror(-ret));
1446         return 4;
1447     }
1448     idx = find_nonzero(buffer, bytes);
1449     if (idx >= 0) {
1450         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1451                 offset + idx);
1452         return 1;
1453     }
1454 
1455     return 0;
1456 }
1457 
1458 /*
1459  * Compares two images. Exit codes:
1460  *
1461  * 0 - Images are identical or the requested help was printed
1462  * 1 - Images differ
1463  * >1 - Error occurred
1464  */
1465 static int img_compare(const img_cmd_t *ccmd, int argc, char **argv)
1466 {
1467     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1468     BlockBackend *blk1, *blk2;
1469     BlockDriverState *bs1, *bs2;
1470     int64_t total_size1, total_size2;
1471     uint8_t *buf1 = NULL, *buf2 = NULL;
1472     int64_t pnum1, pnum2;
1473     int allocated1, allocated2;
1474     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1475     bool progress = false, quiet = false, strict = false;
1476     int flags;
1477     bool writethrough;
1478     int64_t total_size;
1479     int64_t offset = 0;
1480     int64_t chunk;
1481     int c;
1482     uint64_t progress_base;
1483     bool image_opts = false;
1484     bool force_share = false;
1485 
1486     cache = BDRV_DEFAULT_CACHE;
1487     for (;;) {
1488         static const struct option long_options[] = {
1489             {"help", no_argument, 0, 'h'},
1490             {"a-format", required_argument, 0, 'f'},
1491             {"b-format", required_argument, 0, 'F'},
1492             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1493             {"strict", no_argument, 0, 's'},
1494             {"cache", required_argument, 0, 'T'},
1495             {"force-share", no_argument, 0, 'U'},
1496             {"progress", no_argument, 0, 'p'},
1497             {"quiet", no_argument, 0, 'q'},
1498             {"object", required_argument, 0, OPTION_OBJECT},
1499             {0, 0, 0, 0}
1500         };
1501         c = getopt_long(argc, argv, "hf:F:sT:Upq",
1502                         long_options, NULL);
1503         if (c == -1) {
1504             break;
1505         }
1506         switch (c) {
1507         case 'h':
1508             cmd_help(ccmd,
1509 "[[-f FMT] [-F FMT] | --image-opts] [-s] [-T CACHE]\n"
1510 "        [-U] [-p] [-q] [--object OBJDEF] FILE1 FILE2\n"
1511 ,
1512 "  -f, --a-format FMT\n"
1513 "     specify FILE1 image format explicitly (default: probing is used)\n"
1514 "  -F, --b-format FMT\n"
1515 "     specify FILE2 image format explicitly (default: probing is used)\n"
1516 "  --image-opts\n"
1517 "     treat FILE1 and FILE2 as option strings (key=value,..), not file names\n"
1518 "     (incompatible with -f|--a-format and -F|--b-format)\n"
1519 "  -s, --strict\n"
1520 "     strict mode, also check if sizes are equal\n"
1521 "  -T, --cache CACHE_MODE\n"
1522 "     images caching mode (default: " BDRV_DEFAULT_CACHE ")\n"
1523 "  -U, --force-share\n"
1524 "     open images in shared mode for concurrent access\n"
1525 "  -p, --progress\n"
1526 "     display progress information\n"
1527 "  -q, --quiet\n"
1528 "     quiet mode (produce only error messages if any)\n"
1529 "  --object OBJDEF\n"
1530 "     defines QEMU user-creatable object\n"
1531 "  FILE1, FILE2\n"
1532 "     names of the image files, or option strings (key=value,..)\n"
1533 "     with --image-opts, to compare\n"
1534 );
1535             break;
1536         case 'f':
1537             fmt1 = optarg;
1538             break;
1539         case 'F':
1540             fmt2 = optarg;
1541             break;
1542         case OPTION_IMAGE_OPTS:
1543             image_opts = true;
1544             break;
1545         case 's':
1546             strict = true;
1547             break;
1548         case 'T':
1549             cache = optarg;
1550             break;
1551         case 'U':
1552             force_share = true;
1553             break;
1554         case 'p':
1555             progress = true;
1556             break;
1557         case 'q':
1558             quiet = true;
1559             break;
1560         case OPTION_OBJECT:
1561             user_creatable_process_cmdline(optarg);
1562             break;
1563         default:
1564             tryhelp(argv[0]);
1565         }
1566     }
1567 
1568     /* Progress is not shown in Quiet mode */
1569     if (quiet) {
1570         progress = false;
1571     }
1572 
1573 
1574     if (optind != argc - 2) {
1575         error_exit(argv[0], "Expecting two image file names");
1576     }
1577     filename1 = argv[optind++];
1578     filename2 = argv[optind++];
1579 
1580     /* Initialize before goto out */
1581     qemu_progress_init(progress, 2.0);
1582 
1583     flags = 0;
1584     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1585     if (ret < 0) {
1586         error_report("Invalid source cache option: %s", cache);
1587         ret = 2;
1588         goto out3;
1589     }
1590 
1591     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1592                     force_share);
1593     if (!blk1) {
1594         ret = 2;
1595         goto out3;
1596     }
1597 
1598     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1599                     force_share);
1600     if (!blk2) {
1601         ret = 2;
1602         goto out2;
1603     }
1604     bs1 = blk_bs(blk1);
1605     bs2 = blk_bs(blk2);
1606 
1607     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1608     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1609     total_size1 = blk_getlength(blk1);
1610     if (total_size1 < 0) {
1611         error_report("Can't get size of %s: %s",
1612                      filename1, strerror(-total_size1));
1613         ret = 4;
1614         goto out;
1615     }
1616     total_size2 = blk_getlength(blk2);
1617     if (total_size2 < 0) {
1618         error_report("Can't get size of %s: %s",
1619                      filename2, strerror(-total_size2));
1620         ret = 4;
1621         goto out;
1622     }
1623     total_size = MIN(total_size1, total_size2);
1624     progress_base = MAX(total_size1, total_size2);
1625 
1626     qemu_progress_print(0, 100);
1627 
1628     if (strict && total_size1 != total_size2) {
1629         ret = 1;
1630         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1631         goto out;
1632     }
1633 
1634     while (offset < total_size) {
1635         int status1, status2;
1636 
1637         status1 = bdrv_block_status_above(bs1, NULL, offset,
1638                                           total_size1 - offset, &pnum1, NULL,
1639                                           NULL);
1640         if (status1 < 0) {
1641             ret = 3;
1642             error_report("Sector allocation test failed for %s", filename1);
1643             goto out;
1644         }
1645         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1646 
1647         status2 = bdrv_block_status_above(bs2, NULL, offset,
1648                                           total_size2 - offset, &pnum2, NULL,
1649                                           NULL);
1650         if (status2 < 0) {
1651             ret = 3;
1652             error_report("Sector allocation test failed for %s", filename2);
1653             goto out;
1654         }
1655         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1656 
1657         assert(pnum1 && pnum2);
1658         chunk = MIN(pnum1, pnum2);
1659 
1660         if (strict) {
1661             if (status1 != status2) {
1662                 ret = 1;
1663                 qprintf(quiet, "Strict mode: Offset %" PRId64
1664                         " block status mismatch!\n", offset);
1665                 goto out;
1666             }
1667         }
1668         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1669             /* nothing to do */
1670         } else if (allocated1 == allocated2) {
1671             if (allocated1) {
1672                 int64_t pnum;
1673 
1674                 chunk = MIN(chunk, IO_BUF_SIZE);
1675                 ret = blk_pread(blk1, offset, chunk, buf1, 0);
1676                 if (ret < 0) {
1677                     error_report("Error while reading offset %" PRId64
1678                                  " of %s: %s",
1679                                  offset, filename1, strerror(-ret));
1680                     ret = 4;
1681                     goto out;
1682                 }
1683                 ret = blk_pread(blk2, offset, chunk, buf2, 0);
1684                 if (ret < 0) {
1685                     error_report("Error while reading offset %" PRId64
1686                                  " of %s: %s",
1687                                  offset, filename2, strerror(-ret));
1688                     ret = 4;
1689                     goto out;
1690                 }
1691                 ret = compare_buffers(buf1, buf2, chunk, 0, &pnum);
1692                 if (ret || pnum != chunk) {
1693                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1694                             offset + (ret ? 0 : pnum));
1695                     ret = 1;
1696                     goto out;
1697                 }
1698             }
1699         } else {
1700             chunk = MIN(chunk, IO_BUF_SIZE);
1701             if (allocated1) {
1702                 ret = check_empty_sectors(blk1, offset, chunk,
1703                                           filename1, buf1, quiet);
1704             } else {
1705                 ret = check_empty_sectors(blk2, offset, chunk,
1706                                           filename2, buf1, quiet);
1707             }
1708             if (ret) {
1709                 goto out;
1710             }
1711         }
1712         offset += chunk;
1713         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1714     }
1715 
1716     if (total_size1 != total_size2) {
1717         BlockBackend *blk_over;
1718         const char *filename_over;
1719 
1720         qprintf(quiet, "Warning: Image size mismatch!\n");
1721         if (total_size1 > total_size2) {
1722             blk_over = blk1;
1723             filename_over = filename1;
1724         } else {
1725             blk_over = blk2;
1726             filename_over = filename2;
1727         }
1728 
1729         while (offset < progress_base) {
1730             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1731                                           progress_base - offset, &chunk,
1732                                           NULL, NULL);
1733             if (ret < 0) {
1734                 ret = 3;
1735                 error_report("Sector allocation test failed for %s",
1736                              filename_over);
1737                 goto out;
1738 
1739             }
1740             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1741                 chunk = MIN(chunk, IO_BUF_SIZE);
1742                 ret = check_empty_sectors(blk_over, offset, chunk,
1743                                           filename_over, buf1, quiet);
1744                 if (ret) {
1745                     goto out;
1746                 }
1747             }
1748             offset += chunk;
1749             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1750         }
1751     }
1752 
1753     qprintf(quiet, "Images are identical.\n");
1754     ret = 0;
1755 
1756 out:
1757     qemu_vfree(buf1);
1758     qemu_vfree(buf2);
1759     blk_unref(blk2);
1760 out2:
1761     blk_unref(blk1);
1762 out3:
1763     qemu_progress_end();
1764     return ret;
1765 }
1766 
1767 /* Convenience wrapper around qmp_block_dirty_bitmap_merge */
1768 static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name,
1769                                   const char *src_node, const char *src_name,
1770                                   Error **errp)
1771 {
1772     BlockDirtyBitmapOrStr *merge_src;
1773     BlockDirtyBitmapOrStrList *list = NULL;
1774 
1775     merge_src = g_new0(BlockDirtyBitmapOrStr, 1);
1776     merge_src->type = QTYPE_QDICT;
1777     merge_src->u.external.node = g_strdup(src_node);
1778     merge_src->u.external.name = g_strdup(src_name);
1779     QAPI_LIST_PREPEND(list, merge_src);
1780     qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp);
1781     qapi_free_BlockDirtyBitmapOrStrList(list);
1782 }
1783 
1784 enum ImgConvertBlockStatus {
1785     BLK_DATA,
1786     BLK_ZERO,
1787     BLK_BACKING_FILE,
1788 };
1789 
1790 #define MAX_COROUTINES 16
1791 #define CONVERT_THROTTLE_GROUP "img_convert"
1792 
1793 typedef struct ImgConvertState {
1794     BlockBackend **src;
1795     int64_t *src_sectors;
1796     int *src_alignment;
1797     int src_num;
1798     int64_t total_sectors;
1799     int64_t allocated_sectors;
1800     int64_t allocated_done;
1801     int64_t sector_num;
1802     int64_t wr_offs;
1803     enum ImgConvertBlockStatus status;
1804     int64_t sector_next_status;
1805     BlockBackend *target;
1806     bool has_zero_init;
1807     bool compressed;
1808     bool target_is_new;
1809     bool target_has_backing;
1810     int64_t target_backing_sectors; /* negative if unknown */
1811     bool wr_in_order;
1812     bool copy_range;
1813     bool salvage;
1814     bool quiet;
1815     int min_sparse;
1816     int alignment;
1817     size_t cluster_sectors;
1818     size_t buf_sectors;
1819     long num_coroutines;
1820     int running_coroutines;
1821     Coroutine *co[MAX_COROUTINES];
1822     int64_t wait_sector_num[MAX_COROUTINES];
1823     CoMutex lock;
1824     int ret;
1825 } ImgConvertState;
1826 
1827 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1828                                 int *src_cur, int64_t *src_cur_offset)
1829 {
1830     *src_cur = 0;
1831     *src_cur_offset = 0;
1832     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1833         *src_cur_offset += s->src_sectors[*src_cur];
1834         (*src_cur)++;
1835         assert(*src_cur < s->src_num);
1836     }
1837 }
1838 
1839 static int coroutine_mixed_fn GRAPH_RDLOCK
1840 convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1841 {
1842     int64_t src_cur_offset;
1843     int ret, n, src_cur;
1844     bool post_backing_zero = false;
1845 
1846     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1847 
1848     assert(s->total_sectors > sector_num);
1849     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1850 
1851     if (s->target_backing_sectors >= 0) {
1852         if (sector_num >= s->target_backing_sectors) {
1853             post_backing_zero = true;
1854         } else if (sector_num + n > s->target_backing_sectors) {
1855             /* Split requests around target_backing_sectors (because
1856              * starting from there, zeros are handled differently) */
1857             n = s->target_backing_sectors - sector_num;
1858         }
1859     }
1860 
1861     if (s->sector_next_status <= sector_num) {
1862         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1863         int64_t count;
1864         int tail;
1865         BlockDriverState *src_bs = blk_bs(s->src[src_cur]);
1866         BlockDriverState *base;
1867 
1868         if (s->target_has_backing) {
1869             base = bdrv_cow_bs(bdrv_skip_filters(src_bs));
1870         } else {
1871             base = NULL;
1872         }
1873 
1874         do {
1875             count = n * BDRV_SECTOR_SIZE;
1876 
1877             ret = bdrv_block_status_above(src_bs, base, offset, count, &count,
1878                                           NULL, NULL);
1879 
1880             if (ret < 0) {
1881                 if (s->salvage) {
1882                     if (n == 1) {
1883                         if (!s->quiet) {
1884                             warn_report("error while reading block status at "
1885                                         "offset %" PRIu64 ": %s", offset,
1886                                         strerror(-ret));
1887                         }
1888                         /* Just try to read the data, then */
1889                         ret = BDRV_BLOCK_DATA;
1890                         count = BDRV_SECTOR_SIZE;
1891                     } else {
1892                         /* Retry on a shorter range */
1893                         n = DIV_ROUND_UP(n, 4);
1894                     }
1895                 } else {
1896                     error_report("error while reading block status at offset "
1897                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1898                     return ret;
1899                 }
1900             }
1901         } while (ret < 0);
1902 
1903         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1904 
1905         /*
1906          * Avoid that s->sector_next_status becomes unaligned to the source
1907          * request alignment and/or cluster size to avoid unnecessary read
1908          * cycles.
1909          */
1910         tail = (sector_num - src_cur_offset + n) % s->src_alignment[src_cur];
1911         if (n > tail) {
1912             n -= tail;
1913         }
1914 
1915         if (ret & BDRV_BLOCK_ZERO) {
1916             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1917         } else if (ret & BDRV_BLOCK_DATA) {
1918             s->status = BLK_DATA;
1919         } else {
1920             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1921         }
1922 
1923         s->sector_next_status = sector_num + n;
1924     }
1925 
1926     n = MIN(n, s->sector_next_status - sector_num);
1927     if (s->status == BLK_DATA) {
1928         n = MIN(n, s->buf_sectors);
1929     }
1930 
1931     /* We need to write complete clusters for compressed images, so if an
1932      * unallocated area is shorter than that, we must consider the whole
1933      * cluster allocated. */
1934     if (s->compressed) {
1935         if (n < s->cluster_sectors) {
1936             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1937             s->status = BLK_DATA;
1938         } else {
1939             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1940         }
1941     }
1942 
1943     return n;
1944 }
1945 
1946 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1947                                         int nb_sectors, uint8_t *buf)
1948 {
1949     uint64_t single_read_until = 0;
1950     int n, ret;
1951 
1952     assert(nb_sectors <= s->buf_sectors);
1953     while (nb_sectors > 0) {
1954         BlockBackend *blk;
1955         int src_cur;
1956         int64_t bs_sectors, src_cur_offset;
1957         uint64_t offset;
1958 
1959         /* In the case of compression with multiple source files, we can get a
1960          * nb_sectors that spreads into the next part. So we must be able to
1961          * read across multiple BDSes for one convert_read() call. */
1962         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1963         blk = s->src[src_cur];
1964         bs_sectors = s->src_sectors[src_cur];
1965 
1966         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1967 
1968         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1969         if (single_read_until > offset) {
1970             n = 1;
1971         }
1972 
1973         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1974         if (ret < 0) {
1975             if (s->salvage) {
1976                 if (n > 1) {
1977                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1978                     continue;
1979                 } else {
1980                     if (!s->quiet) {
1981                         warn_report("error while reading offset %" PRIu64
1982                                     ": %s", offset, strerror(-ret));
1983                     }
1984                     memset(buf, 0, BDRV_SECTOR_SIZE);
1985                 }
1986             } else {
1987                 return ret;
1988             }
1989         }
1990 
1991         sector_num += n;
1992         nb_sectors -= n;
1993         buf += n * BDRV_SECTOR_SIZE;
1994     }
1995 
1996     return 0;
1997 }
1998 
1999 
2000 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
2001                                          int nb_sectors, uint8_t *buf,
2002                                          enum ImgConvertBlockStatus status)
2003 {
2004     int ret;
2005 
2006     while (nb_sectors > 0) {
2007         int n = nb_sectors;
2008         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
2009 
2010         switch (status) {
2011         case BLK_BACKING_FILE:
2012             /* If we have a backing file, leave clusters unallocated that are
2013              * unallocated in the source image, so that the backing file is
2014              * visible at the respective offset. */
2015             assert(s->target_has_backing);
2016             break;
2017 
2018         case BLK_DATA:
2019             /* If we're told to keep the target fully allocated (-S 0) or there
2020              * is real non-zero data, we must write it. Otherwise we can treat
2021              * it as zero sectors.
2022              * Compressed clusters need to be written as a whole, so in that
2023              * case we can only save the write if the buffer is completely
2024              * zeroed. */
2025             if (!s->min_sparse ||
2026                 (!s->compressed &&
2027                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
2028                                           sector_num, s->alignment)) ||
2029                 (s->compressed &&
2030                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
2031             {
2032                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
2033                                     n << BDRV_SECTOR_BITS, buf, flags);
2034                 if (ret < 0) {
2035                     return ret;
2036                 }
2037                 break;
2038             }
2039             /* fall-through */
2040 
2041         case BLK_ZERO:
2042             if (s->has_zero_init) {
2043                 assert(!s->target_has_backing);
2044                 break;
2045             }
2046             ret = blk_co_pwrite_zeroes(s->target,
2047                                        sector_num << BDRV_SECTOR_BITS,
2048                                        n << BDRV_SECTOR_BITS,
2049                                        BDRV_REQ_MAY_UNMAP);
2050             if (ret < 0) {
2051                 return ret;
2052             }
2053             break;
2054         }
2055 
2056         sector_num += n;
2057         nb_sectors -= n;
2058         buf += n * BDRV_SECTOR_SIZE;
2059     }
2060 
2061     return 0;
2062 }
2063 
2064 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
2065                                               int nb_sectors)
2066 {
2067     int n, ret;
2068 
2069     while (nb_sectors > 0) {
2070         BlockBackend *blk;
2071         int src_cur;
2072         int64_t bs_sectors, src_cur_offset;
2073         int64_t offset;
2074 
2075         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
2076         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
2077         blk = s->src[src_cur];
2078         bs_sectors = s->src_sectors[src_cur];
2079 
2080         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
2081 
2082         ret = blk_co_copy_range(blk, offset, s->target,
2083                                 sector_num << BDRV_SECTOR_BITS,
2084                                 n << BDRV_SECTOR_BITS, 0, 0);
2085         if (ret < 0) {
2086             return ret;
2087         }
2088 
2089         sector_num += n;
2090         nb_sectors -= n;
2091     }
2092     return 0;
2093 }
2094 
2095 static void coroutine_fn convert_co_do_copy(void *opaque)
2096 {
2097     ImgConvertState *s = opaque;
2098     uint8_t *buf = NULL;
2099     int ret, i;
2100     int index = -1;
2101 
2102     for (i = 0; i < s->num_coroutines; i++) {
2103         if (s->co[i] == qemu_coroutine_self()) {
2104             index = i;
2105             break;
2106         }
2107     }
2108     assert(index >= 0);
2109 
2110     s->running_coroutines++;
2111     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
2112 
2113     while (1) {
2114         int n;
2115         int64_t sector_num;
2116         enum ImgConvertBlockStatus status;
2117         bool copy_range;
2118 
2119         qemu_co_mutex_lock(&s->lock);
2120         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
2121             qemu_co_mutex_unlock(&s->lock);
2122             break;
2123         }
2124         WITH_GRAPH_RDLOCK_GUARD() {
2125             n = convert_iteration_sectors(s, s->sector_num);
2126         }
2127         if (n < 0) {
2128             qemu_co_mutex_unlock(&s->lock);
2129             s->ret = n;
2130             break;
2131         }
2132         /* save current sector and allocation status to local variables */
2133         sector_num = s->sector_num;
2134         status = s->status;
2135         if (!s->min_sparse && s->status == BLK_ZERO) {
2136             n = MIN(n, s->buf_sectors);
2137         }
2138         /* increment global sector counter so that other coroutines can
2139          * already continue reading beyond this request */
2140         s->sector_num += n;
2141         qemu_co_mutex_unlock(&s->lock);
2142 
2143         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
2144             s->allocated_done += n;
2145             qemu_progress_print(100.0 * s->allocated_done /
2146                                         s->allocated_sectors, 0);
2147         }
2148 
2149 retry:
2150         copy_range = s->copy_range && s->status == BLK_DATA;
2151         if (status == BLK_DATA && !copy_range) {
2152             ret = convert_co_read(s, sector_num, n, buf);
2153             if (ret < 0) {
2154                 error_report("error while reading at byte %lld: %s",
2155                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2156                 s->ret = ret;
2157             }
2158         } else if (!s->min_sparse && status == BLK_ZERO) {
2159             status = BLK_DATA;
2160             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
2161         }
2162 
2163         if (s->wr_in_order) {
2164             /* keep writes in order */
2165             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
2166                 s->wait_sector_num[index] = sector_num;
2167                 qemu_coroutine_yield();
2168             }
2169             s->wait_sector_num[index] = -1;
2170         }
2171 
2172         if (s->ret == -EINPROGRESS) {
2173             if (copy_range) {
2174                 WITH_GRAPH_RDLOCK_GUARD() {
2175                     ret = convert_co_copy_range(s, sector_num, n);
2176                 }
2177                 if (ret) {
2178                     s->copy_range = false;
2179                     goto retry;
2180                 }
2181             } else {
2182                 ret = convert_co_write(s, sector_num, n, buf, status);
2183             }
2184             if (ret < 0) {
2185                 error_report("error while writing at byte %lld: %s",
2186                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2187                 s->ret = ret;
2188             }
2189         }
2190 
2191         if (s->wr_in_order) {
2192             /* reenter the coroutine that might have waited
2193              * for this write to complete */
2194             s->wr_offs = sector_num + n;
2195             for (i = 0; i < s->num_coroutines; i++) {
2196                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
2197                     /*
2198                      * A -> B -> A cannot occur because A has
2199                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
2200                      * B will never enter A during this time window.
2201                      */
2202                     qemu_coroutine_enter(s->co[i]);
2203                     break;
2204                 }
2205             }
2206         }
2207     }
2208 
2209     qemu_vfree(buf);
2210     s->co[index] = NULL;
2211     s->running_coroutines--;
2212     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
2213         /* the convert job finished successfully */
2214         s->ret = 0;
2215     }
2216 }
2217 
2218 static int convert_do_copy(ImgConvertState *s)
2219 {
2220     int ret, i, n;
2221     int64_t sector_num = 0;
2222 
2223     /* Check whether we have zero initialisation or can get it efficiently */
2224     if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
2225         !s->target_has_backing) {
2226         bdrv_graph_rdlock_main_loop();
2227         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
2228         bdrv_graph_rdunlock_main_loop();
2229     }
2230 
2231     /* Allocate buffer for copied data. For compressed images, only one cluster
2232      * can be copied at a time. */
2233     if (s->compressed) {
2234         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2235             error_report("invalid cluster size");
2236             return -EINVAL;
2237         }
2238         s->buf_sectors = s->cluster_sectors;
2239     }
2240 
2241     while (sector_num < s->total_sectors) {
2242         bdrv_graph_rdlock_main_loop();
2243         n = convert_iteration_sectors(s, sector_num);
2244         bdrv_graph_rdunlock_main_loop();
2245         if (n < 0) {
2246             return n;
2247         }
2248         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2249         {
2250             s->allocated_sectors += n;
2251         }
2252         sector_num += n;
2253     }
2254 
2255     /* Do the copy */
2256     s->sector_next_status = 0;
2257     s->ret = -EINPROGRESS;
2258 
2259     qemu_co_mutex_init(&s->lock);
2260     for (i = 0; i < s->num_coroutines; i++) {
2261         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2262         s->wait_sector_num[i] = -1;
2263         qemu_coroutine_enter(s->co[i]);
2264     }
2265 
2266     while (s->running_coroutines) {
2267         main_loop_wait(false);
2268     }
2269 
2270     if (s->compressed && !s->ret) {
2271         /* signal EOF to align */
2272         ret = blk_pwrite_compressed(s->target, 0, 0, NULL);
2273         if (ret < 0) {
2274             return ret;
2275         }
2276     }
2277 
2278     return s->ret;
2279 }
2280 
2281 /* Check that bitmaps can be copied, or output an error */
2282 static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken)
2283 {
2284     BdrvDirtyBitmap *bm;
2285 
2286     if (!bdrv_supports_persistent_dirty_bitmap(src)) {
2287         error_report("Source lacks bitmap support");
2288         return -1;
2289     }
2290     FOR_EACH_DIRTY_BITMAP(src, bm) {
2291         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2292             continue;
2293         }
2294         if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2295             error_report("Cannot copy inconsistent bitmap '%s'",
2296                          bdrv_dirty_bitmap_name(bm));
2297             error_printf("Try --skip-broken-bitmaps, or "
2298                          "use 'qemu-img bitmap --remove' to delete it\n");
2299             return -1;
2300         }
2301     }
2302     return 0;
2303 }
2304 
2305 static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst,
2306                                 bool skip_broken)
2307 {
2308     BdrvDirtyBitmap *bm;
2309     Error *err = NULL;
2310 
2311     FOR_EACH_DIRTY_BITMAP(src, bm) {
2312         const char *name;
2313 
2314         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2315             continue;
2316         }
2317         name = bdrv_dirty_bitmap_name(bm);
2318         if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2319             warn_report("Skipping inconsistent bitmap '%s'", name);
2320             continue;
2321         }
2322         qmp_block_dirty_bitmap_add(dst->node_name, name,
2323                                    true, bdrv_dirty_bitmap_granularity(bm),
2324                                    true, true,
2325                                    true, !bdrv_dirty_bitmap_enabled(bm),
2326                                    &err);
2327         if (err) {
2328             error_reportf_err(err, "Failed to create bitmap %s: ", name);
2329             return -1;
2330         }
2331 
2332         do_dirty_bitmap_merge(dst->node_name, name, src->node_name, name,
2333                               &err);
2334         if (err) {
2335             error_reportf_err(err, "Failed to populate bitmap %s: ", name);
2336             qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL);
2337             return -1;
2338         }
2339     }
2340 
2341     return 0;
2342 }
2343 
2344 #define MAX_BUF_SECTORS 32768
2345 
2346 static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
2347 {
2348     ThrottleConfig cfg;
2349 
2350     throttle_config_init(&cfg);
2351     cfg.buckets[THROTTLE_BPS_WRITE].avg = rate_limit;
2352 
2353     blk_io_limits_enable(blk, CONVERT_THROTTLE_GROUP);
2354     blk_set_io_limits(blk, &cfg);
2355 }
2356 
2357 static int img_convert(const img_cmd_t *ccmd, int argc, char **argv)
2358 {
2359     int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
2360     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2361                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2362                *out_filename, *out_baseimg_param, *snapshot_name = NULL,
2363                *backing_fmt = NULL;
2364     BlockDriver *drv = NULL, *proto_drv = NULL;
2365     BlockDriverInfo bdi;
2366     BlockDriverState *out_bs;
2367     QemuOpts *opts = NULL, *sn_opts = NULL;
2368     QemuOptsList *create_opts = NULL;
2369     QDict *open_opts = NULL;
2370     char *options = NULL;
2371     Error *local_err = NULL;
2372     bool writethrough, src_writethrough, image_opts = false,
2373          skip_create = false, progress = false, tgt_image_opts = false;
2374     int64_t ret = -EINVAL;
2375     bool force_share = false;
2376     bool explict_min_sparse = false;
2377     bool bitmaps = false;
2378     bool skip_broken = false;
2379     int64_t rate_limit = 0;
2380 
2381     ImgConvertState s = (ImgConvertState) {
2382         /* Need at least 4k of zeros for sparse detection */
2383         .min_sparse         = 8,
2384         .copy_range         = false,
2385         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2386         .wr_in_order        = true,
2387         .num_coroutines     = 8,
2388     };
2389 
2390     for(;;) {
2391         static const struct option long_options[] = {
2392             {"help", no_argument, 0, 'h'},
2393             {"source-format", required_argument, 0, 'f'},
2394             /*
2395              * XXX: historic --image-opts acts on source file only,
2396              * it seems better to have it affect both source and target,
2397              * and have separate --source-image-opts for source,
2398              * but this might break existing setups.
2399              */
2400             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2401             {"source-cache", required_argument, 0, 'T'},
2402             {"snapshot", required_argument, 0, 'l'},
2403             {"bitmaps", no_argument, 0, OPTION_BITMAPS},
2404             {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
2405             {"salvage", no_argument, 0, OPTION_SALVAGE},
2406             {"target-format", required_argument, 0, 'O'},
2407             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2408             {"target-format-options", required_argument, 0, 'o'},
2409             {"target-cache", required_argument, 0, 't'},
2410             {"backing", required_argument, 0, 'b'},
2411             {"backing-format", required_argument, 0, 'F'},
2412             {"sparse-size", required_argument, 0, 'S'},
2413             {"no-create", no_argument, 0, 'n'},
2414             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2415             {"force-share", no_argument, 0, 'U'},
2416             {"rate-limit", required_argument, 0, 'r'},
2417             {"parallel", required_argument, 0, 'm'},
2418             {"oob-writes", no_argument, 0, 'W'},
2419             {"copy-range-offloading", no_argument, 0, 'C'},
2420             {"progress", no_argument, 0, 'p'},
2421             {"quiet", no_argument, 0, 'q'},
2422             {"object", required_argument, 0, OPTION_OBJECT},
2423             {0, 0, 0, 0}
2424         };
2425         c = getopt_long(argc, argv, "hf:O:b:B:CcF:o:l:S:pt:T:nm:WUr:q",
2426                         long_options, NULL);
2427         if (c == -1) {
2428             break;
2429         }
2430         switch (c) {
2431         case 'h':
2432             cmd_help(ccmd, "[-f SRC_FMT | --image-opts] [-T SRC_CACHE]\n"
2433 "        [-l SNAPSHOT] [--bitmaps [--skip-broken-bitmaps]] [--salvage]\n"
2434 "        [-O TGT_FMT | --target-image-opts] [-o TGT_FMT_OPTS] [-t TGT_CACHE]\n"
2435 "        [-b BACKING_FILE [-F BACKING_FMT]] [-S SPARSE_SIZE]\n"
2436 "        [-n] [--target-is-zero] [-c]\n"
2437 "        [-U] [-r RATE] [-m NUM_PARALLEL] [-W] [-C] [-p] [-q] [--object OBJDEF]\n"
2438 "        SRC_FILE [SRC_FILE2...] TGT_FILE\n"
2439 ,
2440 "  -f, --source-format SRC_FMT\n"
2441 "     specify format of all SRC_FILEs explicitly (default: probing is used)\n"
2442 "  --image-opts\n"
2443 "     treat each SRC_FILE as an option string (key=value,...), not a file name\n"
2444 "     (incompatible with -f|--source-format)\n"
2445 "  -T, --source-cache SRC_CACHE\n"
2446 "     source image(s) cache mode (" BDRV_DEFAULT_CACHE ")\n"
2447 "  -l, --snapshot SNAPSHOT\n"
2448 "     specify source snapshot\n"
2449 "  --bitmaps\n"
2450 "     also copy any persistent bitmaps present in source\n"
2451 "  --skip-broken-bitmaps\n"
2452 "     skip (do not error out) any broken bitmaps\n"
2453 "  --salvage\n"
2454 "     ignore errors on input (convert unreadable areas to zeros)\n"
2455 "  -O, --target-format TGT_FMT\n"
2456 "     specify TGT_FILE image format (default: raw)\n"
2457 "  --target-image-opts\n"
2458 "     treat TGT_FILE as an option string (key=value,...), not a file name\n"
2459 "     (incompatible with -O|--target-format)\n"
2460 "  -o, --target-format-options TGT_FMT_OPTS\n"
2461 "     TGT_FMT-specific options\n"
2462 "  -t, --target-cache TGT_CACHE\n"
2463 "     cache mode when opening output image (default: unsafe)\n"
2464 "  -b, --backing BACKING_FILE (was -B in <= 10.0)\n"
2465 "     create target image to be a CoW on top of BACKING_FILE\n"
2466 "  -F, --backing-format BACKING_FMT\n" /* -B used for -b in <=10.0 */
2467 "     specify BACKING_FILE image format explicitly (default: probing is used)\n"
2468 "  -S, --sparse-size SPARSE_SIZE[bkKMGTPE]\n"
2469 "     specify number of consecutive zero bytes to treat as a gap on output\n"
2470 "     (rounded down to nearest 512 bytes), with optional multiplier suffix\n"
2471 "  -n, --no-create\n"
2472 "     omit target volume creation (e.g. on rbd)\n"
2473 "  --target-is-zero\n"
2474 "     indicates that the target volume is pre-zeroed\n"
2475 "  -c, --compress\n"
2476 "     create compressed output image (qcow and qcow2 formats only)\n"
2477 "  -U, --force-share\n"
2478 "     open images in shared mode for concurrent access\n"
2479 "  -r, --rate-limit RATE\n"
2480 "     I/O rate limit, in bytes per second\n"
2481 "  -m, --parallel NUM_PARALLEL\n"
2482 "     specify parallelism (default: 8)\n"
2483 "  -C, --copy-range-offloading\n"
2484 "     try to use copy offloading\n"
2485 "  -W, --oob-writes\n"
2486 "     enable out-of-order writes to improve performance\n"
2487 "  -p, --progress\n"
2488 "     display progress information\n"
2489 "  -q, --quiet\n"
2490 "     quiet mode (produce only error messages if any)\n"
2491 "  --object OBJDEF\n"
2492 "     defines QEMU user-creatable object\n"
2493 "  SRC_FILE...\n"
2494 "     one or more source image file names,\n"
2495 "     or option strings (key=value,..) with --source-image-opts\n"
2496 "  TGT_FILE\n"
2497 "     target (output) image file name,\n"
2498 "     or option string (key=value,..) with --target-image-opts\n"
2499 );
2500             break;
2501         case 'f':
2502             fmt = optarg;
2503             break;
2504         case OPTION_IMAGE_OPTS:
2505             image_opts = true;
2506             break;
2507         case 'T':
2508             src_cache = optarg;
2509             break;
2510         case 'l':
2511             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2512                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2513                                                   optarg, false);
2514                 if (!sn_opts) {
2515                     error_report("Failed in parsing snapshot param '%s'",
2516                                  optarg);
2517                     goto fail_getopt;
2518                 }
2519             } else {
2520                 snapshot_name = optarg;
2521             }
2522             break;
2523         case OPTION_BITMAPS:
2524             bitmaps = true;
2525             break;
2526         case OPTION_SKIP_BROKEN:
2527             skip_broken = true;
2528             break;
2529         case OPTION_SALVAGE:
2530             s.salvage = true;
2531             break;
2532          case 'O':
2533             out_fmt = optarg;
2534             break;
2535         case OPTION_TARGET_IMAGE_OPTS:
2536             tgt_image_opts = true;
2537             break;
2538         case 'o':
2539             if (accumulate_options(&options, optarg) < 0) {
2540                 goto fail_getopt;
2541             }
2542             break;
2543         case 't':
2544             cache = optarg;
2545             break;
2546         case 'B': /* <=10.0 */
2547         case 'b':
2548             out_baseimg = optarg;
2549             break;
2550         case 'F': /* can't use -B as it used as -b in <=10.0 */
2551             backing_fmt = optarg;
2552             break;
2553         case 'S':
2554         {
2555             int64_t sval;
2556 
2557             sval = cvtnum("buffer size for sparse output", optarg);
2558             if (sval < 0) {
2559                 goto fail_getopt;
2560             } else if (!QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2561                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2562                 error_report("Invalid buffer size for sparse output specified. "
2563                     "Valid sizes are multiples of %llu up to %llu. Select "
2564                     "0 to disable sparse detection (fully allocates output).",
2565                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2566                 goto fail_getopt;
2567             }
2568 
2569             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2570             explict_min_sparse = true;
2571             break;
2572         }
2573         case 'n':
2574             skip_create = true;
2575             break;
2576         case OPTION_TARGET_IS_ZERO:
2577             /*
2578              * The user asserting that the target is blank has the
2579              * same effect as the target driver supporting zero
2580              * initialisation.
2581              */
2582             s.has_zero_init = true;
2583             break;
2584         case 'c':
2585             s.compressed = true;
2586             break;
2587         case 'U':
2588             force_share = true;
2589             break;
2590         case 'r':
2591             rate_limit = cvtnum("rate limit", optarg);
2592             if (rate_limit < 0) {
2593                 goto fail_getopt;
2594             }
2595             break;
2596         case 'm':
2597             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2598                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2599                 error_report("Invalid number of coroutines. Allowed number of"
2600                              " coroutines is between 1 and %d", MAX_COROUTINES);
2601                 goto fail_getopt;
2602             }
2603             break;
2604         case 'W':
2605             s.wr_in_order = false;
2606             break;
2607         case 'C':
2608             s.copy_range = true;
2609             break;
2610         case 'p':
2611             progress = true;
2612             break;
2613         case 'q':
2614             s.quiet = true;
2615             break;
2616         case OPTION_OBJECT:
2617             user_creatable_process_cmdline(optarg);
2618             break;
2619         default:
2620             tryhelp(argv[0]);
2621         }
2622     }
2623 
2624     if (!out_fmt && !tgt_image_opts) {
2625         out_fmt = "raw";
2626     }
2627 
2628     if (skip_broken && !bitmaps) {
2629         error_report("Use of --skip-broken-bitmaps requires --bitmaps");
2630         goto fail_getopt;
2631     }
2632 
2633     if (s.compressed && s.copy_range) {
2634         error_report("Cannot enable copy offloading when -c is used");
2635         goto fail_getopt;
2636     }
2637 
2638     if (explict_min_sparse && s.copy_range) {
2639         error_report("Cannot enable copy offloading when -S is used");
2640         goto fail_getopt;
2641     }
2642 
2643     if (s.copy_range && s.salvage) {
2644         error_report("Cannot use copy offloading in salvaging mode");
2645         goto fail_getopt;
2646     }
2647 
2648     if (tgt_image_opts && !skip_create) {
2649         error_report("--target-image-opts requires use of -n flag");
2650         goto fail_getopt;
2651     }
2652 
2653     if (skip_create && options) {
2654         error_report("-o has no effect when skipping image creation");
2655         goto fail_getopt;
2656     }
2657 
2658     if (s.has_zero_init && !skip_create) {
2659         error_report("--target-is-zero requires use of -n flag");
2660         goto fail_getopt;
2661     }
2662 
2663     s.src_num = argc - optind - 1;
2664     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2665 
2666     if (options && has_help_option(options)) {
2667         if (out_fmt) {
2668             ret = print_block_option_help(out_filename, out_fmt);
2669             goto fail_getopt;
2670         } else {
2671             error_report("Option help requires a format be specified");
2672             goto fail_getopt;
2673         }
2674     }
2675 
2676     if (s.src_num < 1) {
2677         error_report("Must specify image file name");
2678         goto fail_getopt;
2679     }
2680 
2681     /* ret is still -EINVAL until here */
2682     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2683     if (ret < 0) {
2684         error_report("Invalid source cache option: %s", src_cache);
2685         goto fail_getopt;
2686     }
2687 
2688     /* Initialize before goto out */
2689     if (s.quiet) {
2690         progress = false;
2691     }
2692     qemu_progress_init(progress, 1.0);
2693     qemu_progress_print(0, 100);
2694 
2695     s.src = g_new0(BlockBackend *, s.src_num);
2696     s.src_sectors = g_new(int64_t, s.src_num);
2697     s.src_alignment = g_new(int, s.src_num);
2698 
2699     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2700         BlockDriverState *src_bs;
2701         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2702                                fmt, src_flags, src_writethrough, s.quiet,
2703                                force_share);
2704         if (!s.src[bs_i]) {
2705             ret = -1;
2706             goto out;
2707         }
2708         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2709         if (s.src_sectors[bs_i] < 0) {
2710             error_report("Could not get size of %s: %s",
2711                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2712             ret = -1;
2713             goto out;
2714         }
2715         src_bs = blk_bs(s.src[bs_i]);
2716         s.src_alignment[bs_i] = DIV_ROUND_UP(src_bs->bl.request_alignment,
2717                                              BDRV_SECTOR_SIZE);
2718         if (!bdrv_get_info(src_bs, &bdi)) {
2719             s.src_alignment[bs_i] = MAX(s.src_alignment[bs_i],
2720                                         bdi.cluster_size / BDRV_SECTOR_SIZE);
2721         }
2722         s.total_sectors += s.src_sectors[bs_i];
2723     }
2724 
2725     if (sn_opts) {
2726         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2727                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2728                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2729                                &local_err);
2730     } else if (snapshot_name != NULL) {
2731         if (s.src_num > 1) {
2732             error_report("No support for concatenating multiple snapshot");
2733             ret = -1;
2734             goto out;
2735         }
2736 
2737         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2738                                              &local_err);
2739     }
2740     if (local_err) {
2741         error_reportf_err(local_err, "Failed to load snapshot: ");
2742         ret = -1;
2743         goto out;
2744     }
2745 
2746     if (!skip_create) {
2747         /* Find driver and parse its options */
2748         drv = bdrv_find_format(out_fmt);
2749         if (!drv) {
2750             error_report("Unknown file format '%s'", out_fmt);
2751             ret = -1;
2752             goto out;
2753         }
2754 
2755         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2756         if (!proto_drv) {
2757             error_report_err(local_err);
2758             ret = -1;
2759             goto out;
2760         }
2761 
2762         if (!drv->create_opts) {
2763             error_report("Format driver '%s' does not support image creation",
2764                          drv->format_name);
2765             ret = -1;
2766             goto out;
2767         }
2768 
2769         if (!proto_drv->create_opts) {
2770             error_report("Protocol driver '%s' does not support image creation",
2771                          proto_drv->format_name);
2772             ret = -1;
2773             goto out;
2774         }
2775 
2776         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2777         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2778 
2779         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2780         if (options) {
2781             if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
2782                 error_report_err(local_err);
2783                 ret = -1;
2784                 goto out;
2785             }
2786         }
2787 
2788         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
2789                             s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
2790         ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
2791         if (ret < 0) {
2792             goto out;
2793         }
2794     }
2795 
2796     /* Get backing file name if -o backing_file was used */
2797     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2798     if (out_baseimg_param) {
2799         out_baseimg = out_baseimg_param;
2800     }
2801     s.target_has_backing = (bool) out_baseimg;
2802 
2803     if (s.has_zero_init && s.target_has_backing) {
2804         error_report("Cannot use --target-is-zero when the destination "
2805                      "image has a backing file");
2806         goto out;
2807     }
2808 
2809     if (s.src_num > 1 && out_baseimg) {
2810         error_report("Having a backing file for the target makes no sense when "
2811                      "concatenating multiple input images");
2812         ret = -1;
2813         goto out;
2814     }
2815 
2816     if (out_baseimg_param) {
2817         if (!qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT)) {
2818             error_report("Use of backing file requires explicit "
2819                          "backing format");
2820             ret = -1;
2821             goto out;
2822         }
2823     }
2824 
2825     /* Check if compression is supported */
2826     if (s.compressed) {
2827         bool encryption =
2828             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2829         const char *encryptfmt =
2830             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2831         const char *preallocation =
2832             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2833 
2834         if (drv && !block_driver_can_compress(drv)) {
2835             error_report("Compression not supported for this file format");
2836             ret = -1;
2837             goto out;
2838         }
2839 
2840         if (encryption || encryptfmt) {
2841             error_report("Compression and encryption not supported at "
2842                          "the same time");
2843             ret = -1;
2844             goto out;
2845         }
2846 
2847         if (preallocation
2848             && strcmp(preallocation, "off"))
2849         {
2850             error_report("Compression and preallocation not supported at "
2851                          "the same time");
2852             ret = -1;
2853             goto out;
2854         }
2855     }
2856 
2857     /* Determine if bitmaps need copying */
2858     if (bitmaps) {
2859         if (s.src_num > 1) {
2860             error_report("Copying bitmaps only possible with single source");
2861             ret = -1;
2862             goto out;
2863         }
2864         ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken);
2865         if (ret < 0) {
2866             goto out;
2867         }
2868     }
2869 
2870     /*
2871      * The later open call will need any decryption secrets, and
2872      * bdrv_create() will purge "opts", so extract them now before
2873      * they are lost.
2874      */
2875     if (!skip_create) {
2876         open_opts = qdict_new();
2877         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2878 
2879         /* Create the new image */
2880         ret = bdrv_create(drv, out_filename, opts, &local_err);
2881         if (ret < 0) {
2882             error_reportf_err(local_err, "%s: error while converting %s: ",
2883                               out_filename, out_fmt);
2884             goto out;
2885         }
2886     }
2887 
2888     s.target_is_new = !skip_create;
2889 
2890     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2891     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2892     if (ret < 0) {
2893         error_report("Invalid cache option: %s", cache);
2894         goto out;
2895     }
2896 
2897     if (flags & BDRV_O_NOCACHE) {
2898         /*
2899          * If we open the target with O_DIRECT, it may be necessary to
2900          * extend its size to align to the physical sector size.
2901          */
2902         flags |= BDRV_O_RESIZE;
2903     }
2904 
2905     if (skip_create) {
2906         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2907                             flags, writethrough, s.quiet, false);
2908     } else {
2909         /* TODO ultimately we should allow --target-image-opts
2910          * to be used even when -n is not given.
2911          * That has to wait for bdrv_create to be improved
2912          * to allow filenames in option syntax
2913          */
2914         s.target = img_open_file(out_filename, open_opts, out_fmt,
2915                                  flags, writethrough, s.quiet, false);
2916         open_opts = NULL; /* blk_new_open will have freed it */
2917     }
2918     if (!s.target) {
2919         ret = -1;
2920         goto out;
2921     }
2922     out_bs = blk_bs(s.target);
2923 
2924     if (bitmaps && !bdrv_supports_persistent_dirty_bitmap(out_bs)) {
2925         error_report("Format driver '%s' does not support bitmaps",
2926                      out_bs->drv->format_name);
2927         ret = -1;
2928         goto out;
2929     }
2930 
2931     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2932         error_report("Compression not supported for this file format");
2933         ret = -1;
2934         goto out;
2935     }
2936 
2937     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2938      * or discard_alignment of the out_bs is greater. Limit to
2939      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2940     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2941                         MAX(s.buf_sectors,
2942                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2943                                 out_bs->bl.pdiscard_alignment >>
2944                                 BDRV_SECTOR_BITS)));
2945 
2946     /* try to align the write requests to the destination to avoid unnecessary
2947      * RMW cycles. */
2948     s.alignment = MAX(pow2floor(s.min_sparse),
2949                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2950                                    BDRV_SECTOR_SIZE));
2951     assert(is_power_of_2(s.alignment));
2952 
2953     if (skip_create) {
2954         int64_t output_sectors = blk_nb_sectors(s.target);
2955         if (output_sectors < 0) {
2956             error_report("unable to get output image length: %s",
2957                          strerror(-output_sectors));
2958             ret = -1;
2959             goto out;
2960         } else if (output_sectors < s.total_sectors) {
2961             error_report("output file is smaller than input file");
2962             ret = -1;
2963             goto out;
2964         }
2965     }
2966 
2967     if (s.target_has_backing && s.target_is_new) {
2968         /* Errors are treated as "backing length unknown" (which means
2969          * s.target_backing_sectors has to be negative, which it will
2970          * be automatically).  The backing file length is used only
2971          * for optimizations, so such a case is not fatal. */
2972         bdrv_graph_rdlock_main_loop();
2973         s.target_backing_sectors =
2974             bdrv_nb_sectors(bdrv_backing_chain_next(out_bs));
2975         bdrv_graph_rdunlock_main_loop();
2976     } else {
2977         s.target_backing_sectors = -1;
2978     }
2979 
2980     ret = bdrv_get_info(out_bs, &bdi);
2981     if (ret < 0) {
2982         if (s.compressed) {
2983             error_report("could not get block driver info");
2984             goto out;
2985         }
2986     } else {
2987         s.compressed = s.compressed || bdi.needs_compressed_writes;
2988         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2989     }
2990 
2991     if (rate_limit) {
2992         set_rate_limit(s.target, rate_limit);
2993     }
2994 
2995     ret = convert_do_copy(&s);
2996 
2997     /* Now copy the bitmaps */
2998     if (bitmaps && ret == 0) {
2999         ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken);
3000     }
3001 
3002 out:
3003     if (!ret) {
3004         qemu_progress_print(100, 0);
3005     }
3006     qemu_progress_end();
3007     qemu_opts_del(opts);
3008     qemu_opts_free(create_opts);
3009     qobject_unref(open_opts);
3010     blk_unref(s.target);
3011     if (s.src) {
3012         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
3013             blk_unref(s.src[bs_i]);
3014         }
3015         g_free(s.src);
3016     }
3017     g_free(s.src_sectors);
3018     g_free(s.src_alignment);
3019 fail_getopt:
3020     qemu_opts_del(sn_opts);
3021     g_free(options);
3022 
3023     return !!ret;
3024 }
3025 
3026 
3027 static void dump_snapshots(BlockDriverState *bs)
3028 {
3029     QEMUSnapshotInfo *sn_tab, *sn;
3030     int nb_sns, i;
3031 
3032     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
3033     if (nb_sns <= 0)
3034         return;
3035     printf("Snapshot list:\n");
3036     bdrv_snapshot_dump(NULL);
3037     printf("\n");
3038     for(i = 0; i < nb_sns; i++) {
3039         sn = &sn_tab[i];
3040         bdrv_snapshot_dump(sn);
3041         printf("\n");
3042     }
3043     g_free(sn_tab);
3044 }
3045 
3046 static void dump_json_block_graph_info_list(BlockGraphInfoList *list)
3047 {
3048     GString *str;
3049     QObject *obj;
3050     Visitor *v = qobject_output_visitor_new(&obj);
3051 
3052     visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort);
3053     visit_complete(v, &obj);
3054     str = qobject_to_json_pretty(obj, true);
3055     assert(str != NULL);
3056     printf("%s\n", str->str);
3057     qobject_unref(obj);
3058     visit_free(v);
3059     g_string_free(str, true);
3060 }
3061 
3062 static void dump_json_block_graph_info(BlockGraphInfo *info)
3063 {
3064     GString *str;
3065     QObject *obj;
3066     Visitor *v = qobject_output_visitor_new(&obj);
3067 
3068     visit_type_BlockGraphInfo(v, NULL, &info, &error_abort);
3069     visit_complete(v, &obj);
3070     str = qobject_to_json_pretty(obj, true);
3071     assert(str != NULL);
3072     printf("%s\n", str->str);
3073     qobject_unref(obj);
3074     visit_free(v);
3075     g_string_free(str, true);
3076 }
3077 
3078 static void dump_human_image_info(BlockGraphInfo *info, int indentation,
3079                                   const char *path)
3080 {
3081     BlockChildInfoList *children_list;
3082 
3083     bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation,
3084                         info->children == NULL);
3085 
3086     for (children_list = info->children; children_list;
3087          children_list = children_list->next)
3088     {
3089         BlockChildInfo *child = children_list->value;
3090         g_autofree char *child_path = NULL;
3091 
3092         printf("%*sChild node '%s%s':\n",
3093                indentation * 4, "", path, child->name);
3094         child_path = g_strdup_printf("%s%s/", path, child->name);
3095         dump_human_image_info(child->info, indentation + 1, child_path);
3096     }
3097 }
3098 
3099 static void dump_human_image_info_list(BlockGraphInfoList *list)
3100 {
3101     BlockGraphInfoList *elem;
3102     bool delim = false;
3103 
3104     for (elem = list; elem; elem = elem->next) {
3105         if (delim) {
3106             printf("\n");
3107         }
3108         delim = true;
3109 
3110         dump_human_image_info(elem->value, 0, "/");
3111     }
3112 }
3113 
3114 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
3115 {
3116     return strcmp(a, b) == 0;
3117 }
3118 
3119 /**
3120  * Open an image file chain and return an BlockGraphInfoList
3121  *
3122  * @filename: topmost image filename
3123  * @fmt: topmost image format (may be NULL to autodetect)
3124  * @chain: true  - enumerate entire backing file chain
3125  *         false - only topmost image file
3126  *
3127  * Returns a list of BlockNodeInfo objects or NULL if there was an error
3128  * opening an image file.  If there was an error a message will have been
3129  * printed to stderr.
3130  */
3131 static BlockGraphInfoList *collect_image_info_list(bool image_opts,
3132                                                    const char *filename,
3133                                                    const char *fmt,
3134                                                    bool chain, bool force_share)
3135 {
3136     BlockGraphInfoList *head = NULL;
3137     BlockGraphInfoList **tail = &head;
3138     GHashTable *filenames;
3139     Error *err = NULL;
3140 
3141     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
3142 
3143     while (filename) {
3144         BlockBackend *blk;
3145         BlockDriverState *bs;
3146         BlockGraphInfo *info;
3147 
3148         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
3149             error_report("Backing file '%s' creates an infinite loop.",
3150                          filename);
3151             goto err;
3152         }
3153         g_hash_table_insert(filenames, (gpointer)filename, NULL);
3154 
3155         blk = img_open(image_opts, filename, fmt,
3156                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
3157                        force_share);
3158         if (!blk) {
3159             goto err;
3160         }
3161         bs = blk_bs(blk);
3162 
3163         /*
3164          * Note that the returned BlockGraphInfo object will not have
3165          * information about this image's backing node, because we have opened
3166          * it with BDRV_O_NO_BACKING.  Printing this object will therefore not
3167          * duplicate the backing chain information that we obtain by walking
3168          * the chain manually here.
3169          */
3170         bdrv_graph_rdlock_main_loop();
3171         bdrv_query_block_graph_info(bs, &info, &err);
3172         bdrv_graph_rdunlock_main_loop();
3173 
3174         if (err) {
3175             error_report_err(err);
3176             blk_unref(blk);
3177             goto err;
3178         }
3179 
3180         QAPI_LIST_APPEND(tail, info);
3181 
3182         blk_unref(blk);
3183 
3184         /* Clear parameters that only apply to the topmost image */
3185         filename = fmt = NULL;
3186         image_opts = false;
3187 
3188         if (chain) {
3189             if (info->full_backing_filename) {
3190                 filename = info->full_backing_filename;
3191             } else if (info->backing_filename) {
3192                 error_report("Could not determine absolute backing filename,"
3193                              " but backing filename '%s' present",
3194                              info->backing_filename);
3195                 goto err;
3196             }
3197             if (info->backing_filename_format) {
3198                 fmt = info->backing_filename_format;
3199             }
3200         }
3201     }
3202     g_hash_table_destroy(filenames);
3203     return head;
3204 
3205 err:
3206     qapi_free_BlockGraphInfoList(head);
3207     g_hash_table_destroy(filenames);
3208     return NULL;
3209 }
3210 
3211 static int img_info(const img_cmd_t *ccmd, int argc, char **argv)
3212 {
3213     int c;
3214     OutputFormat output_format = OFORMAT_HUMAN;
3215     bool chain = false;
3216     const char *filename, *fmt;
3217     BlockGraphInfoList *list;
3218     bool image_opts = false;
3219     bool force_share = false;
3220 
3221     fmt = NULL;
3222     for(;;) {
3223         static const struct option long_options[] = {
3224             {"help", no_argument, 0, 'h'},
3225             {"format", required_argument, 0, 'f'},
3226             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3227             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
3228             {"force-share", no_argument, 0, 'U'},
3229             {"output", required_argument, 0, OPTION_OUTPUT},
3230             {"object", required_argument, 0, OPTION_OBJECT},
3231             {0, 0, 0, 0}
3232         };
3233         c = getopt_long(argc, argv, "hf:U", long_options, NULL);
3234         if (c == -1) {
3235             break;
3236         }
3237         switch(c) {
3238         case 'h':
3239             cmd_help(ccmd, "[-f FMT | --image-opts] [--backing-chain] [-U]\n"
3240 "        [--output human|json] [--object OBJDEF] FILE\n"
3241 ,
3242 "  -f, --format FMT\n"
3243 "     specify FILE image format explicitly (default: probing is used)\n"
3244 "  --image-opts\n"
3245 "     treat FILE as an option string (key=value,..), not a file name\n"
3246 "     (incompatible with -f|--format)\n"
3247 "  --backing-chain\n"
3248 "     display information about the backing chain for copy-on-write overlays\n"
3249 "  -U, --force-share\n"
3250 "     open image in shared mode for concurrent access\n"
3251 "  --output human|json\n"
3252 "     specify output format (default: human)\n"
3253 "  --object OBJDEF\n"
3254 "     defines QEMU user-creatable object\n"
3255 "  FILE\n"
3256 "     name of the image file, or option string (key=value,..)\n"
3257 "     with --image-opts, to operate on\n"
3258 );
3259             break;
3260         case 'f':
3261             fmt = optarg;
3262             break;
3263         case OPTION_IMAGE_OPTS:
3264             image_opts = true;
3265             break;
3266         case OPTION_BACKING_CHAIN:
3267             chain = true;
3268             break;
3269         case 'U':
3270             force_share = true;
3271             break;
3272         case OPTION_OUTPUT:
3273             output_format = parse_output_format(argv[0], optarg);
3274             break;
3275         case OPTION_OBJECT:
3276             user_creatable_process_cmdline(optarg);
3277             break;
3278         default:
3279             tryhelp(argv[0]);
3280         }
3281     }
3282     if (optind != argc - 1) {
3283         error_exit(argv[0], "Expecting one image file name");
3284     }
3285     filename = argv[optind++];
3286 
3287     list = collect_image_info_list(image_opts, filename, fmt, chain,
3288                                    force_share);
3289     if (!list) {
3290         return 1;
3291     }
3292 
3293     switch (output_format) {
3294     case OFORMAT_HUMAN:
3295         dump_human_image_info_list(list);
3296         break;
3297     case OFORMAT_JSON:
3298         if (chain) {
3299             dump_json_block_graph_info_list(list);
3300         } else {
3301             dump_json_block_graph_info(list->value);
3302         }
3303         break;
3304     }
3305 
3306     qapi_free_BlockGraphInfoList(list);
3307     return 0;
3308 }
3309 
3310 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
3311                           MapEntry *next)
3312 {
3313     switch (output_format) {
3314     case OFORMAT_HUMAN:
3315         if (e->data && !e->has_offset) {
3316             error_report("File contains external, encrypted or compressed clusters.");
3317             return -1;
3318         }
3319         if (e->data && !e->zero) {
3320             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
3321                    e->start, e->length,
3322                    e->has_offset ? e->offset : 0,
3323                    e->filename ?: "");
3324         }
3325         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
3326          * Modify the flags here to allow more coalescing.
3327          */
3328         if (next && (!next->data || next->zero)) {
3329             next->data = false;
3330             next->zero = true;
3331         }
3332         break;
3333     case OFORMAT_JSON:
3334         printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
3335                " \"depth\": %"PRId64", \"present\": %s, \"zero\": %s,"
3336                " \"data\": %s, \"compressed\": %s",
3337                e->start, e->length, e->depth,
3338                e->present ? "true" : "false",
3339                e->zero ? "true" : "false",
3340                e->data ? "true" : "false",
3341                e->compressed ? "true" : "false");
3342         if (e->has_offset) {
3343             printf(", \"offset\": %"PRId64"", e->offset);
3344         }
3345         putchar('}');
3346 
3347         if (next) {
3348             puts(",");
3349         }
3350         break;
3351     }
3352     return 0;
3353 }
3354 
3355 static int get_block_status(BlockDriverState *bs, int64_t offset,
3356                             int64_t bytes, MapEntry *e)
3357 {
3358     int ret;
3359     int depth;
3360     BlockDriverState *file;
3361     bool has_offset;
3362     int64_t map;
3363     char *filename = NULL;
3364 
3365     GLOBAL_STATE_CODE();
3366     GRAPH_RDLOCK_GUARD_MAINLOOP();
3367 
3368     /* As an optimization, we could cache the current range of unallocated
3369      * clusters in each file of the chain, and avoid querying the same
3370      * range repeatedly.
3371      */
3372 
3373     depth = 0;
3374     for (;;) {
3375         bs = bdrv_skip_filters(bs);
3376         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
3377         if (ret < 0) {
3378             return ret;
3379         }
3380         assert(bytes);
3381         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
3382             break;
3383         }
3384         bs = bdrv_cow_bs(bs);
3385         if (bs == NULL) {
3386             ret = 0;
3387             break;
3388         }
3389 
3390         depth++;
3391     }
3392 
3393     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
3394 
3395     if (file && has_offset) {
3396         bdrv_refresh_filename(file);
3397         filename = file->filename;
3398     }
3399 
3400     *e = (MapEntry) {
3401         .start = offset,
3402         .length = bytes,
3403         .data = !!(ret & BDRV_BLOCK_DATA),
3404         .zero = !!(ret & BDRV_BLOCK_ZERO),
3405         .compressed = !!(ret & BDRV_BLOCK_COMPRESSED),
3406         .offset = map,
3407         .has_offset = has_offset,
3408         .depth = depth,
3409         .present = !!(ret & BDRV_BLOCK_ALLOCATED),
3410         .filename = filename,
3411     };
3412 
3413     return 0;
3414 }
3415 
3416 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
3417 {
3418     if (curr->length == 0) {
3419         return false;
3420     }
3421     if (curr->zero != next->zero ||
3422         curr->data != next->data ||
3423         curr->compressed != next->compressed ||
3424         curr->depth != next->depth ||
3425         curr->present != next->present ||
3426         !curr->filename != !next->filename ||
3427         curr->has_offset != next->has_offset) {
3428         return false;
3429     }
3430     if (curr->filename && strcmp(curr->filename, next->filename)) {
3431         return false;
3432     }
3433     if (curr->has_offset && curr->offset + curr->length != next->offset) {
3434         return false;
3435     }
3436     return true;
3437 }
3438 
3439 static int img_map(const img_cmd_t *ccmd, int argc, char **argv)
3440 {
3441     int c;
3442     OutputFormat output_format = OFORMAT_HUMAN;
3443     BlockBackend *blk;
3444     BlockDriverState *bs;
3445     const char *filename, *fmt;
3446     int64_t length;
3447     MapEntry curr = { .length = 0 }, next;
3448     int ret = 0;
3449     bool image_opts = false;
3450     bool force_share = false;
3451     int64_t start_offset = 0;
3452     int64_t max_length = -1;
3453 
3454     fmt = NULL;
3455     for (;;) {
3456         static const struct option long_options[] = {
3457             {"help", no_argument, 0, 'h'},
3458             {"format", required_argument, 0, 'f'},
3459             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3460             {"start-offset", required_argument, 0, 's'},
3461             {"max-length", required_argument, 0, 'l'},
3462             {"force-share", no_argument, 0, 'U'},
3463             {"output", required_argument, 0, OPTION_OUTPUT},
3464             {"object", required_argument, 0, OPTION_OBJECT},
3465             {0, 0, 0, 0}
3466         };
3467         c = getopt_long(argc, argv, "hf:s:l:U",
3468                         long_options, NULL);
3469         if (c == -1) {
3470             break;
3471         }
3472         switch (c) {
3473         case 'h':
3474             cmd_help(ccmd, "[-f FMT | --image-opts]\n"
3475 "        [--start-offset OFFSET] [--max-length LENGTH]\n"
3476 "        [--output human|json] [-U] [--object OBJDEF] FILE\n"
3477 ,
3478 "  -f, --format FMT\n"
3479 "     specify FILE image format explicitly (default: probing is used)\n"
3480 "  --image-opts\n"
3481 "     treat FILE as an option string (key=value,..), not a file name\n"
3482 "     (incompatible with -f|--format)\n"
3483 "  -s, --start-offset OFFSET\n"
3484 "     start at the given OFFSET in the image, not at the beginning\n"
3485 "  -l, --max-length LENGTH\n"
3486 "     process at most LENGTH bytes instead of up to the end of the image\n"
3487 "  --output human|json\n"
3488 "     specify output format name (default: human)\n"
3489 "  -U, --force-share\n"
3490 "     open image in shared mode for concurrent access\n"
3491 "  --object OBJDEF\n"
3492 "     defines QEMU user-creatable object\n"
3493 "  FILE\n"
3494 "     the image file name, or option string (key=value,..)\n"
3495 "     with --image-opts, to operate on\n"
3496 );
3497             break;
3498         case 'f':
3499             fmt = optarg;
3500             break;
3501         case OPTION_IMAGE_OPTS:
3502             image_opts = true;
3503             break;
3504         case 's':
3505             start_offset = cvtnum("start offset", optarg);
3506             if (start_offset < 0) {
3507                 return 1;
3508             }
3509             break;
3510         case 'l':
3511             max_length = cvtnum("max length", optarg);
3512             if (max_length < 0) {
3513                 return 1;
3514             }
3515             break;
3516         case OPTION_OUTPUT:
3517             output_format = parse_output_format(argv[0], optarg);
3518             break;
3519         case 'U':
3520             force_share = true;
3521             break;
3522         case OPTION_OBJECT:
3523             user_creatable_process_cmdline(optarg);
3524             break;
3525         default:
3526             tryhelp(argv[0]);
3527         }
3528     }
3529     if (optind != argc - 1) {
3530         error_exit(argv[0], "Expecting one image file name");
3531     }
3532     filename = argv[optind];
3533 
3534     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3535     if (!blk) {
3536         return 1;
3537     }
3538     bs = blk_bs(blk);
3539 
3540     if (output_format == OFORMAT_HUMAN) {
3541         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3542     } else if (output_format == OFORMAT_JSON) {
3543         putchar('[');
3544     }
3545 
3546     length = blk_getlength(blk);
3547     if (length < 0) {
3548         error_report("Failed to get size for '%s'", filename);
3549         return 1;
3550     }
3551     if (max_length != -1) {
3552         length = MIN(start_offset + max_length, length);
3553     }
3554 
3555     curr.start = start_offset;
3556     while (curr.start + curr.length < length) {
3557         int64_t offset = curr.start + curr.length;
3558         int64_t n = length - offset;
3559 
3560         ret = get_block_status(bs, offset, n, &next);
3561         if (ret < 0) {
3562             error_report("Could not read file metadata: %s", strerror(-ret));
3563             goto out;
3564         }
3565 
3566         if (entry_mergeable(&curr, &next)) {
3567             curr.length += next.length;
3568             continue;
3569         }
3570 
3571         if (curr.length > 0) {
3572             ret = dump_map_entry(output_format, &curr, &next);
3573             if (ret < 0) {
3574                 goto out;
3575             }
3576         }
3577         curr = next;
3578     }
3579 
3580     ret = dump_map_entry(output_format, &curr, NULL);
3581     if (output_format == OFORMAT_JSON) {
3582         puts("]");
3583     }
3584 
3585 out:
3586     blk_unref(blk);
3587     return ret < 0;
3588 }
3589 
3590 /* the same as options */
3591 #define SNAPSHOT_LIST   'l'
3592 #define SNAPSHOT_CREATE 'c'
3593 #define SNAPSHOT_APPLY  'a'
3594 #define SNAPSHOT_DELETE 'd'
3595 
3596 static int img_snapshot(const img_cmd_t *ccmd, int argc, char **argv)
3597 {
3598     BlockBackend *blk;
3599     BlockDriverState *bs;
3600     QEMUSnapshotInfo sn;
3601     char *filename, *fmt = NULL, *snapshot_name = NULL;
3602     int c, ret = 0;
3603     int action = 0;
3604     bool quiet = false;
3605     Error *err = NULL;
3606     bool image_opts = false;
3607     bool force_share = false;
3608     int64_t rt;
3609 
3610     /* Parse commandline parameters */
3611     for(;;) {
3612         static const struct option long_options[] = {
3613             {"help", no_argument, 0, 'h'},
3614             {"format", required_argument, 0, 'f'},
3615             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3616             {"list", no_argument, 0, SNAPSHOT_LIST},
3617             {"apply", required_argument, 0, SNAPSHOT_APPLY},
3618             {"create", required_argument, 0, SNAPSHOT_CREATE},
3619             {"delete", required_argument, 0, SNAPSHOT_DELETE},
3620             {"force-share", no_argument, 0, 'U'},
3621             {"quiet", no_argument, 0, 'q'},
3622             {"object", required_argument, 0, OPTION_OBJECT},
3623             {0, 0, 0, 0}
3624         };
3625         c = getopt_long(argc, argv, "hf:la:c:d:Uq",
3626                         long_options, NULL);
3627         if (c == -1) {
3628             break;
3629         }
3630         switch(c) {
3631         case 'h':
3632             cmd_help(ccmd, "[-f FMT | --image-opts] [-l | -a|-c|-d SNAPSHOT]\n"
3633 "        [-U] [-q] [--object OBJDEF] FILE\n"
3634 ,
3635 "  -f, --format FMT\n"
3636 "     specify FILE format explicitly (default: probing is used)\n"
3637 "  --image-opts\n"
3638 "     treat FILE as an option string (key=value,..), not a file name\n"
3639 "     (incompatible with -f|--format)\n"
3640 "  -l, --list\n"
3641 "     list snapshots in FILE (default action if no -l|-c|-a|-d is given)\n"
3642 "  -c, --create SNAPSHOT\n"
3643 "     create named snapshot\n"
3644 "  -a, --apply SNAPSHOT\n"
3645 "     apply named snapshot to the base\n"
3646 "  -d, --delete SNAPSHOT\n"
3647 "     delete named snapshot\n"
3648 "  (only one of -l|-c|-a|-d can be specified)\n"
3649 "  -U, --force-share\n"
3650 "     open image in shared mode for concurrent access\n"
3651 "  -q, --quiet\n"
3652 "     quiet mode (produce only error messages if any)\n"
3653 "  --object OBJDEF\n"
3654 "     defines QEMU user-creatable object\n"
3655 "  FILE\n"
3656 "     name of the image file, or option string (key=value,..)\n"
3657 "     with --image-opts) to operate on\n"
3658 );
3659             break;
3660         case 'f':
3661             fmt = optarg;
3662             break;
3663         case OPTION_IMAGE_OPTS:
3664             image_opts = true;
3665             break;
3666         case SNAPSHOT_LIST:
3667         case SNAPSHOT_APPLY:
3668         case SNAPSHOT_CREATE:
3669         case SNAPSHOT_DELETE:
3670             if (action) {
3671                 error_exit(argv[0], "Cannot mix '-l', '-a', '-c', '-d'");
3672                 return 0;
3673             }
3674             action = c;
3675             snapshot_name = optarg;
3676             break;
3677         case 'U':
3678             force_share = true;
3679             break;
3680         case 'q':
3681             quiet = true;
3682             break;
3683         case OPTION_OBJECT:
3684             user_creatable_process_cmdline(optarg);
3685             break;
3686         default:
3687             tryhelp(argv[0]);
3688         }
3689     }
3690 
3691     if (optind != argc - 1) {
3692         error_exit(argv[0], "Expecting one image file name");
3693     }
3694     filename = argv[optind++];
3695 
3696     if (!action) {
3697         action = SNAPSHOT_LIST;
3698     }
3699 
3700     /* Open the image */
3701     blk = img_open(image_opts, filename, fmt,
3702                    action == SNAPSHOT_LIST ? 0 : BDRV_O_RDWR,
3703                    false, quiet, force_share);
3704     if (!blk) {
3705         return 1;
3706     }
3707     bs = blk_bs(blk);
3708 
3709     /* Perform the requested action */
3710     switch(action) {
3711     case SNAPSHOT_LIST:
3712         dump_snapshots(bs);
3713         break;
3714 
3715     case SNAPSHOT_CREATE:
3716         memset(&sn, 0, sizeof(sn));
3717         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3718 
3719         rt = g_get_real_time();
3720         sn.date_sec = rt / G_USEC_PER_SEC;
3721         sn.date_nsec = (rt % G_USEC_PER_SEC) * 1000;
3722 
3723         bdrv_graph_rdlock_main_loop();
3724         ret = bdrv_snapshot_create(bs, &sn);
3725         bdrv_graph_rdunlock_main_loop();
3726 
3727         if (ret) {
3728             error_report("Could not create snapshot '%s': %s",
3729                 snapshot_name, strerror(-ret));
3730         }
3731         break;
3732 
3733     case SNAPSHOT_APPLY:
3734         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3735         if (ret) {
3736             error_reportf_err(err, "Could not apply snapshot '%s': ",
3737                               snapshot_name);
3738         }
3739         break;
3740 
3741     case SNAPSHOT_DELETE:
3742         bdrv_drain_all_begin();
3743         bdrv_graph_rdlock_main_loop();
3744         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3745         if (ret < 0) {
3746             error_report("Could not delete snapshot '%s': snapshot not "
3747                          "found", snapshot_name);
3748             ret = 1;
3749         } else {
3750             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3751             if (ret < 0) {
3752                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3753                                   snapshot_name);
3754                 ret = 1;
3755             }
3756         }
3757         bdrv_graph_rdunlock_main_loop();
3758         bdrv_drain_all_end();
3759         break;
3760     }
3761 
3762     /* Cleanup */
3763     blk_unref(blk);
3764     if (ret) {
3765         return 1;
3766     }
3767     return 0;
3768 }
3769 
3770 static int img_rebase(const img_cmd_t *ccmd, int argc, char **argv)
3771 {
3772     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3773     uint8_t *buf_old = NULL;
3774     uint8_t *buf_new = NULL;
3775     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3776     BlockDriverState *unfiltered_bs, *unfiltered_bs_cow;
3777     BlockDriverInfo bdi = {0};
3778     char *filename;
3779     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3780     int c, flags, src_flags, ret;
3781     BdrvRequestFlags write_flags = 0;
3782     bool writethrough, src_writethrough;
3783     int unsafe = 0;
3784     bool force_share = false;
3785     int progress = 0;
3786     bool quiet = false;
3787     bool compress = false;
3788     Error *local_err = NULL;
3789     bool image_opts = false;
3790     int64_t write_align;
3791 
3792     /* Parse commandline parameters */
3793     fmt = NULL;
3794     cache = BDRV_DEFAULT_CACHE;
3795     src_cache = BDRV_DEFAULT_CACHE;
3796     out_baseimg = NULL;
3797     out_basefmt = NULL;
3798     for(;;) {
3799         static const struct option long_options[] = {
3800             {"help", no_argument, 0, 'h'},
3801             {"format", required_argument, 0, 'f'},
3802             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3803             {"cache", required_argument, 0, 't'},
3804             {"compress", no_argument, 0, 'c'},
3805             {"backing", required_argument, 0, 'b'},
3806             {"backing-format", required_argument, 0, 'B'},
3807             {"backing-cache", required_argument, 0, 'T'},
3808             {"backing-unsafe", no_argument, 0, 'u'},
3809             {"force-share", no_argument, 0, 'U'},
3810             {"progress", no_argument, 0, 'p'},
3811             {"quiet", no_argument, 0, 'q'},
3812             {"object", required_argument, 0, OPTION_OBJECT},
3813             {0, 0, 0, 0}
3814         };
3815         c = getopt_long(argc, argv, "hf:t:cb:F:B:T:uUpq",
3816                         long_options, NULL);
3817         if (c == -1) {
3818             break;
3819         }
3820         switch (c) {
3821         case 'h':
3822             cmd_help(ccmd, "[-f FMT | --image-opts] [-t CACHE]\n"
3823 "        [-b BACKING_FILE [-B BACKING_FMT] [-T BACKING_CACHE]] [-u]\n"
3824 "        [-c] [-U] [-p] [-q] [--object OBJDEF] FILE\n"
3825 ,
3826 "  -f, --format FMT\n"
3827 "     specify FILE format explicitly (default: probing is used)\n"
3828 "  --image-opts\n"
3829 "     treat FILE as an option string (key=value,..), not a file name\n"
3830 "     (incompatible with -f|--format)\n"
3831 "  -t, --cache CACHE\n"
3832 "     cache mode for FILE (default: " BDRV_DEFAULT_CACHE ")\n"
3833 "  -b, --backing BACKING_FILE|\"\"\n"
3834 "     rebase onto this file (specify empty name for no backing file)\n"
3835 "  -B, --backing-format BACKING_FMT (was -F in <=10.0)\n"
3836 "     specify format for BACKING_FILE explicitly (default: probing is used)\n"
3837 "  -T, --backing-cache CACHE\n"
3838 "     BACKING_FILE cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
3839 "  -u, --backing-unsafe\n"
3840 "     do not fail if BACKING_FILE can not be read\n"
3841 "  -c, --compress\n"
3842 "     compress image (when image supports this)\n"
3843 "  -U, --force-share\n"
3844 "     open image in shared mode for concurrent access\n"
3845 "  -p, --progress\n"
3846 "     display progress information\n"
3847 "  -q, --quiet\n"
3848 "     quiet mode (produce only error messages if any)\n"
3849 "  --object OBJDEF\n"
3850 "     defines QEMU user-creatable object\n"
3851 "  FILE\n"
3852 "     name of the image file, or option string (key=value,..)\n"
3853 "     with --image-opts, to operate on\n"
3854 );
3855             return 0;
3856         case 'f':
3857             fmt = optarg;
3858             break;
3859         case OPTION_IMAGE_OPTS:
3860             image_opts = true;
3861             break;
3862         case 't':
3863             cache = optarg;
3864             break;
3865         case 'b':
3866             out_baseimg = optarg;
3867             break;
3868         case 'F': /* <=10.0 */
3869         case 'B':
3870             out_basefmt = optarg;
3871             break;
3872         case 'u':
3873             unsafe = 1;
3874             break;
3875         case 'c':
3876             compress = true;
3877             break;
3878         case 'U':
3879             force_share = true;
3880             break;
3881         case 'p':
3882             progress = 1;
3883             break;
3884         case 'T':
3885             src_cache = optarg;
3886             break;
3887         case 'q':
3888             quiet = true;
3889             break;
3890         case OPTION_OBJECT:
3891             user_creatable_process_cmdline(optarg);
3892             break;
3893         default:
3894             tryhelp(argv[0]);
3895         }
3896     }
3897 
3898     if (quiet) {
3899         progress = 0;
3900     }
3901 
3902     if (optind != argc - 1) {
3903         error_exit(argv[0], "Expecting one image file name");
3904     }
3905     if (!unsafe && !out_baseimg) {
3906         error_exit(argv[0],
3907                    "Must specify backing file (-b) or use unsafe mode (-u)");
3908     }
3909     filename = argv[optind++];
3910 
3911     qemu_progress_init(progress, 2.0);
3912     qemu_progress_print(0, 100);
3913 
3914     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3915     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3916     if (ret < 0) {
3917         error_report("Invalid cache option: %s", cache);
3918         goto out;
3919     }
3920 
3921     src_flags = 0;
3922     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3923     if (ret < 0) {
3924         error_report("Invalid source cache option: %s", src_cache);
3925         goto out;
3926     }
3927 
3928     /* The source files are opened read-only, don't care about WCE */
3929     assert((src_flags & BDRV_O_RDWR) == 0);
3930     (void) src_writethrough;
3931 
3932     /*
3933      * Open the images.
3934      *
3935      * Ignore the old backing file for unsafe rebase in case we want to correct
3936      * the reference to a renamed or moved backing file.
3937      */
3938     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3939                    false);
3940     if (!blk) {
3941         ret = -1;
3942         goto out;
3943     }
3944     bs = blk_bs(blk);
3945 
3946     bdrv_graph_rdlock_main_loop();
3947     unfiltered_bs = bdrv_skip_filters(bs);
3948     unfiltered_bs_cow = bdrv_cow_bs(unfiltered_bs);
3949     bdrv_graph_rdunlock_main_loop();
3950 
3951     if (compress && !block_driver_can_compress(unfiltered_bs->drv)) {
3952         error_report("Compression not supported for this file format");
3953         ret = -1;
3954         goto out;
3955     } else if (compress) {
3956         write_flags |= BDRV_REQ_WRITE_COMPRESSED;
3957     }
3958 
3959     if (out_basefmt != NULL) {
3960         if (bdrv_find_format(out_basefmt) == NULL) {
3961             error_report("Invalid format name: '%s'", out_basefmt);
3962             ret = -1;
3963             goto out;
3964         }
3965     }
3966 
3967     /*
3968      * We need overlay subcluster size (or cluster size in case writes are
3969      * compressed) to make sure write requests are aligned.
3970      */
3971     ret = bdrv_get_info(unfiltered_bs, &bdi);
3972     if (ret < 0) {
3973         error_report("could not get block driver info");
3974         goto out;
3975     } else if (bdi.subcluster_size == 0) {
3976         bdi.cluster_size = bdi.subcluster_size = 1;
3977     }
3978 
3979     write_align = compress ? bdi.cluster_size : bdi.subcluster_size;
3980 
3981     /* For safe rebasing we need to compare old and new backing file */
3982     if (!unsafe) {
3983         QDict *options = NULL;
3984         BlockDriverState *base_bs;
3985 
3986         bdrv_graph_rdlock_main_loop();
3987         base_bs = bdrv_cow_bs(unfiltered_bs);
3988         bdrv_graph_rdunlock_main_loop();
3989 
3990         if (base_bs) {
3991             blk_old_backing = blk_new(qemu_get_aio_context(),
3992                                       BLK_PERM_CONSISTENT_READ,
3993                                       BLK_PERM_ALL);
3994             ret = blk_insert_bs(blk_old_backing, base_bs,
3995                                 &local_err);
3996             if (ret < 0) {
3997                 error_reportf_err(local_err,
3998                                   "Could not reuse old backing file '%s': ",
3999                                   base_bs->filename);
4000                 goto out;
4001             }
4002         } else {
4003             blk_old_backing = NULL;
4004         }
4005 
4006         if (out_baseimg[0]) {
4007             const char *overlay_filename;
4008             char *out_real_path;
4009 
4010             options = qdict_new();
4011             if (out_basefmt) {
4012                 qdict_put_str(options, "driver", out_basefmt);
4013             }
4014             if (force_share) {
4015                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
4016             }
4017 
4018             bdrv_graph_rdlock_main_loop();
4019             bdrv_refresh_filename(bs);
4020             bdrv_graph_rdunlock_main_loop();
4021             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
4022                                                      : bs->filename;
4023             out_real_path =
4024                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
4025                                                              out_baseimg,
4026                                                              &local_err);
4027             if (local_err) {
4028                 qobject_unref(options);
4029                 error_reportf_err(local_err,
4030                                   "Could not resolve backing filename: ");
4031                 ret = -1;
4032                 goto out;
4033             }
4034 
4035             /*
4036              * Find out whether we rebase an image on top of a previous image
4037              * in its chain.
4038              */
4039             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
4040             if (prefix_chain_bs) {
4041                 qobject_unref(options);
4042                 g_free(out_real_path);
4043 
4044                 blk_new_backing = blk_new(qemu_get_aio_context(),
4045                                           BLK_PERM_CONSISTENT_READ,
4046                                           BLK_PERM_ALL);
4047                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
4048                                     &local_err);
4049                 if (ret < 0) {
4050                     error_reportf_err(local_err,
4051                                       "Could not reuse backing file '%s': ",
4052                                       out_baseimg);
4053                     goto out;
4054                 }
4055             } else {
4056                 blk_new_backing = blk_new_open(out_real_path, NULL,
4057                                                options, src_flags, &local_err);
4058                 g_free(out_real_path);
4059                 if (!blk_new_backing) {
4060                     error_reportf_err(local_err,
4061                                       "Could not open new backing file '%s': ",
4062                                       out_baseimg);
4063                     ret = -1;
4064                     goto out;
4065                 }
4066             }
4067         }
4068     }
4069 
4070     /*
4071      * Check each unallocated cluster in the COW file. If it is unallocated,
4072      * accesses go to the backing file. We must therefore compare this cluster
4073      * in the old and new backing file, and if they differ we need to copy it
4074      * from the old backing file into the COW file.
4075      *
4076      * If qemu-img crashes during this step, no harm is done. The content of
4077      * the image is the same as the original one at any time.
4078      */
4079     if (!unsafe) {
4080         int64_t size;
4081         int64_t old_backing_size = 0;
4082         int64_t new_backing_size = 0;
4083         uint64_t offset;
4084         int64_t n, n_old = 0, n_new = 0;
4085         float local_progress = 0;
4086 
4087         if (blk_old_backing && bdrv_opt_mem_align(blk_bs(blk_old_backing)) >
4088             bdrv_opt_mem_align(blk_bs(blk))) {
4089             buf_old = blk_blockalign(blk_old_backing, IO_BUF_SIZE);
4090         } else {
4091             buf_old = blk_blockalign(blk, IO_BUF_SIZE);
4092         }
4093         buf_new = blk_blockalign(blk_new_backing, IO_BUF_SIZE);
4094 
4095         size = blk_getlength(blk);
4096         if (size < 0) {
4097             error_report("Could not get size of '%s': %s",
4098                          filename, strerror(-size));
4099             ret = -1;
4100             goto out;
4101         }
4102         if (blk_old_backing) {
4103             old_backing_size = blk_getlength(blk_old_backing);
4104             if (old_backing_size < 0) {
4105                 char backing_name[PATH_MAX];
4106 
4107                 bdrv_get_backing_filename(bs, backing_name,
4108                                           sizeof(backing_name));
4109                 error_report("Could not get size of '%s': %s",
4110                              backing_name, strerror(-old_backing_size));
4111                 ret = -1;
4112                 goto out;
4113             }
4114         }
4115         if (blk_new_backing) {
4116             new_backing_size = blk_getlength(blk_new_backing);
4117             if (new_backing_size < 0) {
4118                 error_report("Could not get size of '%s': %s",
4119                              out_baseimg, strerror(-new_backing_size));
4120                 ret = -1;
4121                 goto out;
4122             }
4123         }
4124 
4125         if (size != 0) {
4126             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
4127         }
4128 
4129         for (offset = 0; offset < size; offset += n) {
4130             bool old_backing_eof = false;
4131             int64_t n_alloc;
4132 
4133             /* How many bytes can we handle with the next read? */
4134             n = MIN(IO_BUF_SIZE, size - offset);
4135 
4136             /* If the cluster is allocated, we don't need to take action */
4137             ret = bdrv_is_allocated(unfiltered_bs, offset, n, &n);
4138             if (ret < 0) {
4139                 error_report("error while reading image metadata: %s",
4140                              strerror(-ret));
4141                 goto out;
4142             }
4143             if (ret) {
4144                 continue;
4145             }
4146 
4147             if (prefix_chain_bs) {
4148                 uint64_t bytes = n;
4149 
4150                 /*
4151                  * If cluster wasn't changed since prefix_chain, we don't need
4152                  * to take action
4153                  */
4154                 ret = bdrv_is_allocated_above(unfiltered_bs_cow,
4155                                               prefix_chain_bs, false,
4156                                               offset, n, &n);
4157                 if (ret < 0) {
4158                     error_report("error while reading image metadata: %s",
4159                                  strerror(-ret));
4160                     goto out;
4161                 }
4162                 if (!ret && n) {
4163                     continue;
4164                 }
4165                 if (!n) {
4166                     /*
4167                      * If we've reached EOF of the old backing, it means that
4168                      * offsets beyond the old backing size were read as zeroes.
4169                      * Now we will need to explicitly zero the cluster in
4170                      * order to preserve that state after the rebase.
4171                      */
4172                     n = bytes;
4173                 }
4174             }
4175 
4176             /*
4177              * At this point we know that the region [offset; offset + n)
4178              * is unallocated within the target image.  This region might be
4179              * unaligned to the target image's (sub)cluster boundaries, as
4180              * old backing may have smaller clusters (or have subclusters).
4181              * We extend it to the aligned boundaries to avoid CoW on
4182              * partial writes in blk_pwrite(),
4183              */
4184             n += offset - QEMU_ALIGN_DOWN(offset, write_align);
4185             offset = QEMU_ALIGN_DOWN(offset, write_align);
4186             n += QEMU_ALIGN_UP(offset + n, write_align) - (offset + n);
4187             n = MIN(n, size - offset);
4188             assert(!bdrv_is_allocated(unfiltered_bs, offset, n, &n_alloc) &&
4189                    n_alloc == n);
4190 
4191             /*
4192              * Much like with the target image, we'll try to read as much
4193              * of the old and new backings as we can.
4194              */
4195             n_old = MIN(n, MAX(0, old_backing_size - (int64_t) offset));
4196             n_new = MIN(n, MAX(0, new_backing_size - (int64_t) offset));
4197 
4198             /*
4199              * Read old and new backing file and take into consideration that
4200              * backing files may be smaller than the COW image.
4201              */
4202             memset(buf_old + n_old, 0, n - n_old);
4203             if (!n_old) {
4204                 old_backing_eof = true;
4205             } else {
4206                 ret = blk_pread(blk_old_backing, offset, n_old, buf_old, 0);
4207                 if (ret < 0) {
4208                     error_report("error while reading from old backing file");
4209                     goto out;
4210                 }
4211             }
4212 
4213             memset(buf_new + n_new, 0, n - n_new);
4214             if (n_new) {
4215                 ret = blk_pread(blk_new_backing, offset, n_new, buf_new, 0);
4216                 if (ret < 0) {
4217                     error_report("error while reading from new backing file");
4218                     goto out;
4219                 }
4220             }
4221 
4222             /* If they differ, we need to write to the COW file */
4223             uint64_t written = 0;
4224 
4225             while (written < n) {
4226                 int64_t pnum;
4227 
4228                 if (compare_buffers(buf_old + written, buf_new + written,
4229                                     n - written, write_align, &pnum))
4230                 {
4231                     if (old_backing_eof) {
4232                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
4233                     } else {
4234                         assert(written + pnum <= IO_BUF_SIZE);
4235                         ret = blk_pwrite(blk, offset + written, pnum,
4236                                          buf_old + written, write_flags);
4237                     }
4238                     if (ret < 0) {
4239                         error_report("Error while writing to COW image: %s",
4240                             strerror(-ret));
4241                         goto out;
4242                     }
4243                 }
4244 
4245                 written += pnum;
4246                 if (offset + written >= old_backing_size) {
4247                     old_backing_eof = true;
4248                 }
4249             }
4250             qemu_progress_print(local_progress, 100);
4251         }
4252     }
4253 
4254     /*
4255      * Change the backing file. All clusters that are different from the old
4256      * backing file are overwritten in the COW file now, so the visible content
4257      * doesn't change when we switch the backing file.
4258      */
4259     if (out_baseimg && *out_baseimg) {
4260         ret = bdrv_change_backing_file(unfiltered_bs, out_baseimg, out_basefmt,
4261                                        true);
4262     } else {
4263         ret = bdrv_change_backing_file(unfiltered_bs, NULL, NULL, false);
4264     }
4265 
4266     if (ret == -ENOSPC) {
4267         error_report("Could not change the backing file to '%s': No "
4268                      "space left in the file header", out_baseimg);
4269     } else if (ret == -EINVAL && out_baseimg && !out_basefmt) {
4270         error_report("Could not change the backing file to '%s': backing "
4271                      "format must be specified", out_baseimg);
4272     } else if (ret < 0) {
4273         error_report("Could not change the backing file to '%s': %s",
4274             out_baseimg, strerror(-ret));
4275     }
4276 
4277     qemu_progress_print(100, 0);
4278     /*
4279      * TODO At this point it is possible to check if any clusters that are
4280      * allocated in the COW file are the same in the backing file. If so, they
4281      * could be dropped from the COW file. Don't do this before switching the
4282      * backing file, in case of a crash this would lead to corruption.
4283      */
4284 out:
4285     qemu_progress_end();
4286     /* Cleanup */
4287     if (!unsafe) {
4288         blk_unref(blk_old_backing);
4289         blk_unref(blk_new_backing);
4290     }
4291     qemu_vfree(buf_old);
4292     qemu_vfree(buf_new);
4293 
4294     blk_unref(blk);
4295     if (ret) {
4296         return 1;
4297     }
4298     return 0;
4299 }
4300 
4301 static int img_resize(const img_cmd_t *ccmd, int argc, char **argv)
4302 {
4303     Error *err = NULL;
4304     int c, ret, relative;
4305     const char *filename, *fmt, *size;
4306     int64_t n, total_size, current_size;
4307     bool quiet = false;
4308     BlockBackend *blk = NULL;
4309     PreallocMode prealloc = PREALLOC_MODE_OFF;
4310     QemuOpts *param;
4311 
4312     static QemuOptsList resize_options = {
4313         .name = "resize_options",
4314         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
4315         .desc = {
4316             {
4317                 .name = BLOCK_OPT_SIZE,
4318                 .type = QEMU_OPT_SIZE,
4319                 .help = "Virtual disk size"
4320             }, {
4321                 /* end of list */
4322             }
4323         },
4324     };
4325     bool image_opts = false;
4326     bool shrink = false;
4327 
4328     /* Remove size from argv manually so that negative numbers are not treated
4329      * as options by getopt. */
4330     if (argc < 3) {
4331         error_exit(argv[0], "Not enough arguments");
4332         return 1;
4333     }
4334 
4335     size = argv[--argc];
4336 
4337     /* Parse getopt arguments */
4338     fmt = NULL;
4339     for(;;) {
4340         static const struct option long_options[] = {
4341             {"help", no_argument, 0, 'h'},
4342             {"object", required_argument, 0, OPTION_OBJECT},
4343             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4344             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
4345             {"shrink", no_argument, 0, OPTION_SHRINK},
4346             {0, 0, 0, 0}
4347         };
4348         c = getopt_long(argc, argv, ":f:hq",
4349                         long_options, NULL);
4350         if (c == -1) {
4351             break;
4352         }
4353         switch(c) {
4354         case ':':
4355             missing_argument(argv[optind - 1]);
4356             break;
4357         case '?':
4358             unrecognized_option(argv[optind - 1]);
4359             break;
4360         case 'h':
4361             help();
4362             break;
4363         case 'f':
4364             fmt = optarg;
4365             break;
4366         case 'q':
4367             quiet = true;
4368             break;
4369         case OPTION_OBJECT:
4370             user_creatable_process_cmdline(optarg);
4371             break;
4372         case OPTION_IMAGE_OPTS:
4373             image_opts = true;
4374             break;
4375         case OPTION_PREALLOCATION:
4376             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
4377                                        PREALLOC_MODE__MAX, NULL);
4378             if (prealloc == PREALLOC_MODE__MAX) {
4379                 error_report("Invalid preallocation mode '%s'", optarg);
4380                 return 1;
4381             }
4382             break;
4383         case OPTION_SHRINK:
4384             shrink = true;
4385             break;
4386         }
4387     }
4388     if (optind != argc - 1) {
4389         error_exit(argv[0], "Expecting image file name and size");
4390     }
4391     filename = argv[optind++];
4392 
4393     /* Choose grow, shrink, or absolute resize mode */
4394     switch (size[0]) {
4395     case '+':
4396         relative = 1;
4397         size++;
4398         break;
4399     case '-':
4400         relative = -1;
4401         size++;
4402         break;
4403     default:
4404         relative = 0;
4405         break;
4406     }
4407 
4408     /* Parse size */
4409     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
4410     if (!qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err)) {
4411         error_report_err(err);
4412         ret = -1;
4413         qemu_opts_del(param);
4414         goto out;
4415     }
4416     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
4417     qemu_opts_del(param);
4418 
4419     blk = img_open(image_opts, filename, fmt,
4420                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
4421                    false);
4422     if (!blk) {
4423         ret = -1;
4424         goto out;
4425     }
4426 
4427     current_size = blk_getlength(blk);
4428     if (current_size < 0) {
4429         error_report("Failed to inquire current image length: %s",
4430                      strerror(-current_size));
4431         ret = -1;
4432         goto out;
4433     }
4434 
4435     if (relative) {
4436         total_size = current_size + n * relative;
4437     } else {
4438         total_size = n;
4439     }
4440     if (total_size <= 0) {
4441         error_report("New image size must be positive");
4442         ret = -1;
4443         goto out;
4444     }
4445 
4446     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
4447         error_report("Preallocation can only be used for growing images");
4448         ret = -1;
4449         goto out;
4450     }
4451 
4452     if (total_size < current_size && !shrink) {
4453         error_report("Use the --shrink option to perform a shrink operation.");
4454         warn_report("Shrinking an image will delete all data beyond the "
4455                     "shrunken image's end. Before performing such an "
4456                     "operation, make sure there is no important data there.");
4457         ret = -1;
4458         goto out;
4459     }
4460 
4461     /*
4462      * The user expects the image to have the desired size after
4463      * resizing, so pass @exact=true.  It is of no use to report
4464      * success when the image has not actually been resized.
4465      */
4466     ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
4467     if (!ret) {
4468         qprintf(quiet, "Image resized.\n");
4469     } else {
4470         error_report_err(err);
4471     }
4472 out:
4473     blk_unref(blk);
4474     if (ret) {
4475         return 1;
4476     }
4477     return 0;
4478 }
4479 
4480 static void amend_status_cb(BlockDriverState *bs,
4481                             int64_t offset, int64_t total_work_size,
4482                             void *opaque)
4483 {
4484     qemu_progress_print(100.f * offset / total_work_size, 0);
4485 }
4486 
4487 static int print_amend_option_help(const char *format)
4488 {
4489     BlockDriver *drv;
4490 
4491     GRAPH_RDLOCK_GUARD_MAINLOOP();
4492 
4493     /* Find driver and parse its options */
4494     drv = bdrv_find_format(format);
4495     if (!drv) {
4496         error_report("Unknown file format '%s'", format);
4497         return 1;
4498     }
4499 
4500     if (!drv->bdrv_amend_options) {
4501         error_report("Format driver '%s' does not support option amendment",
4502                      format);
4503         return 1;
4504     }
4505 
4506     /* Every driver supporting amendment must have amend_opts */
4507     assert(drv->amend_opts);
4508 
4509     printf("Amend options for '%s':\n", format);
4510     qemu_opts_print_help(drv->amend_opts, false);
4511     return 0;
4512 }
4513 
4514 static int img_amend(const img_cmd_t *ccmd, int argc, char **argv)
4515 {
4516     Error *err = NULL;
4517     int c, ret = 0;
4518     char *options = NULL;
4519     QemuOptsList *amend_opts = NULL;
4520     QemuOpts *opts = NULL;
4521     const char *fmt = NULL, *filename, *cache;
4522     int flags;
4523     bool writethrough;
4524     bool quiet = false, progress = false;
4525     BlockBackend *blk = NULL;
4526     BlockDriverState *bs = NULL;
4527     bool image_opts = false;
4528     bool force = false;
4529 
4530     cache = BDRV_DEFAULT_CACHE;
4531     for (;;) {
4532         static const struct option long_options[] = {
4533             {"help", no_argument, 0, 'h'},
4534             {"object", required_argument, 0, OPTION_OBJECT},
4535             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4536             {"force", no_argument, 0, OPTION_FORCE},
4537             {0, 0, 0, 0}
4538         };
4539         c = getopt_long(argc, argv, ":ho:f:t:pq",
4540                         long_options, NULL);
4541         if (c == -1) {
4542             break;
4543         }
4544 
4545         switch (c) {
4546         case ':':
4547             missing_argument(argv[optind - 1]);
4548             break;
4549         case '?':
4550             unrecognized_option(argv[optind - 1]);
4551             break;
4552         case 'h':
4553             help();
4554             break;
4555         case 'o':
4556             if (accumulate_options(&options, optarg) < 0) {
4557                 ret = -1;
4558                 goto out_no_progress;
4559             }
4560             break;
4561         case 'f':
4562             fmt = optarg;
4563             break;
4564         case 't':
4565             cache = optarg;
4566             break;
4567         case 'p':
4568             progress = true;
4569             break;
4570         case 'q':
4571             quiet = true;
4572             break;
4573         case OPTION_OBJECT:
4574             user_creatable_process_cmdline(optarg);
4575             break;
4576         case OPTION_IMAGE_OPTS:
4577             image_opts = true;
4578             break;
4579         case OPTION_FORCE:
4580             force = true;
4581             break;
4582         }
4583     }
4584 
4585     if (!options) {
4586         error_exit(argv[0], "Must specify options (-o)");
4587     }
4588 
4589     if (quiet) {
4590         progress = false;
4591     }
4592     qemu_progress_init(progress, 1.0);
4593 
4594     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4595     if (fmt && has_help_option(options)) {
4596         /* If a format is explicitly specified (and possibly no filename is
4597          * given), print option help here */
4598         ret = print_amend_option_help(fmt);
4599         goto out;
4600     }
4601 
4602     if (optind != argc - 1) {
4603         error_report("Expecting one image file name");
4604         ret = -1;
4605         goto out;
4606     }
4607 
4608     flags = BDRV_O_RDWR;
4609     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4610     if (ret < 0) {
4611         error_report("Invalid cache option: %s", cache);
4612         goto out;
4613     }
4614 
4615     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4616                    false);
4617     if (!blk) {
4618         ret = -1;
4619         goto out;
4620     }
4621     bs = blk_bs(blk);
4622 
4623     fmt = bs->drv->format_name;
4624 
4625     if (has_help_option(options)) {
4626         /* If the format was auto-detected, print option help here */
4627         ret = print_amend_option_help(fmt);
4628         goto out;
4629     }
4630 
4631     bdrv_graph_rdlock_main_loop();
4632     if (!bs->drv->bdrv_amend_options) {
4633         error_report("Format driver '%s' does not support option amendment",
4634                      fmt);
4635         bdrv_graph_rdunlock_main_loop();
4636         ret = -1;
4637         goto out;
4638     }
4639 
4640     /* Every driver supporting amendment must have amend_opts */
4641     assert(bs->drv->amend_opts);
4642 
4643     amend_opts = qemu_opts_append(amend_opts, bs->drv->amend_opts);
4644     opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4645     if (!qemu_opts_do_parse(opts, options, NULL, &err)) {
4646         /* Try to parse options using the create options */
4647         amend_opts = qemu_opts_append(amend_opts, bs->drv->create_opts);
4648         qemu_opts_del(opts);
4649         opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4650         if (qemu_opts_do_parse(opts, options, NULL, NULL)) {
4651             error_append_hint(&err,
4652                               "This option is only supported for image creation\n");
4653         }
4654 
4655         bdrv_graph_rdunlock_main_loop();
4656         error_report_err(err);
4657         ret = -1;
4658         goto out;
4659     }
4660 
4661     /* In case the driver does not call amend_status_cb() */
4662     qemu_progress_print(0.f, 0);
4663     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err);
4664     qemu_progress_print(100.f, 0);
4665     bdrv_graph_rdunlock_main_loop();
4666 
4667     if (ret < 0) {
4668         error_report_err(err);
4669         goto out;
4670     }
4671 
4672 out:
4673     qemu_progress_end();
4674 
4675 out_no_progress:
4676     blk_unref(blk);
4677     qemu_opts_del(opts);
4678     qemu_opts_free(amend_opts);
4679     g_free(options);
4680 
4681     if (ret) {
4682         return 1;
4683     }
4684     return 0;
4685 }
4686 
4687 typedef struct BenchData {
4688     BlockBackend *blk;
4689     uint64_t image_size;
4690     bool write;
4691     int bufsize;
4692     int step;
4693     int nrreq;
4694     int n;
4695     int flush_interval;
4696     bool drain_on_flush;
4697     uint8_t *buf;
4698     QEMUIOVector *qiov;
4699 
4700     int in_flight;
4701     bool in_flush;
4702     uint64_t offset;
4703 } BenchData;
4704 
4705 static void bench_undrained_flush_cb(void *opaque, int ret)
4706 {
4707     if (ret < 0) {
4708         error_report("Failed flush request: %s", strerror(-ret));
4709         exit(EXIT_FAILURE);
4710     }
4711 }
4712 
4713 static void bench_cb(void *opaque, int ret)
4714 {
4715     BenchData *b = opaque;
4716     BlockAIOCB *acb;
4717 
4718     if (ret < 0) {
4719         error_report("Failed request: %s", strerror(-ret));
4720         exit(EXIT_FAILURE);
4721     }
4722 
4723     if (b->in_flush) {
4724         /* Just finished a flush with drained queue: Start next requests */
4725         assert(b->in_flight == 0);
4726         b->in_flush = false;
4727     } else if (b->in_flight > 0) {
4728         int remaining = b->n - b->in_flight;
4729 
4730         b->n--;
4731         b->in_flight--;
4732 
4733         /* Time for flush? Drain queue if requested, then flush */
4734         if (b->flush_interval && remaining % b->flush_interval == 0) {
4735             if (!b->in_flight || !b->drain_on_flush) {
4736                 BlockCompletionFunc *cb;
4737 
4738                 if (b->drain_on_flush) {
4739                     b->in_flush = true;
4740                     cb = bench_cb;
4741                 } else {
4742                     cb = bench_undrained_flush_cb;
4743                 }
4744 
4745                 acb = blk_aio_flush(b->blk, cb, b);
4746                 if (!acb) {
4747                     error_report("Failed to issue flush request");
4748                     exit(EXIT_FAILURE);
4749                 }
4750             }
4751             if (b->drain_on_flush) {
4752                 return;
4753             }
4754         }
4755     }
4756 
4757     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4758         int64_t offset = b->offset;
4759         /* blk_aio_* might look for completed I/Os and kick bench_cb
4760          * again, so make sure this operation is counted by in_flight
4761          * and b->offset is ready for the next submission.
4762          */
4763         b->in_flight++;
4764         b->offset += b->step;
4765         if (b->image_size <= b->bufsize) {
4766             b->offset = 0;
4767         } else {
4768             b->offset %= b->image_size - b->bufsize;
4769         }
4770         if (b->write) {
4771             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4772         } else {
4773             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4774         }
4775         if (!acb) {
4776             error_report("Failed to issue request");
4777             exit(EXIT_FAILURE);
4778         }
4779     }
4780 }
4781 
4782 static int img_bench(const img_cmd_t *ccmd, int argc, char **argv)
4783 {
4784     int c, ret = 0;
4785     const char *fmt = NULL, *filename;
4786     bool quiet = false;
4787     bool image_opts = false;
4788     bool is_write = false;
4789     int count = 75000;
4790     int depth = 64;
4791     int64_t offset = 0;
4792     size_t bufsize = 4096;
4793     int pattern = 0;
4794     size_t step = 0;
4795     int flush_interval = 0;
4796     bool drain_on_flush = true;
4797     int64_t image_size;
4798     BlockBackend *blk = NULL;
4799     BenchData data = {};
4800     int flags = 0;
4801     bool writethrough = false;
4802     struct timeval t1, t2;
4803     int i;
4804     bool force_share = false;
4805     size_t buf_size = 0;
4806 
4807     for (;;) {
4808         static const struct option long_options[] = {
4809             {"help", no_argument, 0, 'h'},
4810             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4811             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4812             {"pattern", required_argument, 0, OPTION_PATTERN},
4813             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4814             {"force-share", no_argument, 0, 'U'},
4815             {0, 0, 0, 0}
4816         };
4817         c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
4818                         NULL);
4819         if (c == -1) {
4820             break;
4821         }
4822 
4823         switch (c) {
4824         case ':':
4825             missing_argument(argv[optind - 1]);
4826             break;
4827         case '?':
4828             unrecognized_option(argv[optind - 1]);
4829             break;
4830         case 'h':
4831             help();
4832             break;
4833         case 'c':
4834         {
4835             unsigned long res;
4836 
4837             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4838                 error_report("Invalid request count specified");
4839                 return 1;
4840             }
4841             count = res;
4842             break;
4843         }
4844         case 'd':
4845         {
4846             unsigned long res;
4847 
4848             if (qemu_strtoul(optarg, NULL, 0, &res) <= 0 || res > INT_MAX) {
4849                 error_report("Invalid queue depth specified");
4850                 return 1;
4851             }
4852             depth = res;
4853             break;
4854         }
4855         case 'f':
4856             fmt = optarg;
4857             break;
4858         case 'n':
4859             flags |= BDRV_O_NATIVE_AIO;
4860             break;
4861         case 'i':
4862             ret = bdrv_parse_aio(optarg, &flags);
4863             if (ret < 0) {
4864                 error_report("Invalid aio option: %s", optarg);
4865                 ret = -1;
4866                 goto out;
4867             }
4868             break;
4869         case 'o':
4870         {
4871             offset = cvtnum("offset", optarg);
4872             if (offset < 0) {
4873                 return 1;
4874             }
4875             break;
4876         }
4877             break;
4878         case 'q':
4879             quiet = true;
4880             break;
4881         case 's':
4882         {
4883             int64_t sval;
4884 
4885             sval = cvtnum_full("buffer size", optarg, 0, INT_MAX);
4886             if (sval < 0) {
4887                 return 1;
4888             }
4889 
4890             bufsize = sval;
4891             break;
4892         }
4893         case 'S':
4894         {
4895             int64_t sval;
4896 
4897             sval = cvtnum_full("step_size", optarg, 0, INT_MAX);
4898             if (sval < 0) {
4899                 return 1;
4900             }
4901 
4902             step = sval;
4903             break;
4904         }
4905         case 't':
4906             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4907             if (ret < 0) {
4908                 error_report("Invalid cache mode");
4909                 ret = -1;
4910                 goto out;
4911             }
4912             break;
4913         case 'w':
4914             flags |= BDRV_O_RDWR;
4915             is_write = true;
4916             break;
4917         case 'U':
4918             force_share = true;
4919             break;
4920         case OPTION_PATTERN:
4921         {
4922             unsigned long res;
4923 
4924             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4925                 error_report("Invalid pattern byte specified");
4926                 return 1;
4927             }
4928             pattern = res;
4929             break;
4930         }
4931         case OPTION_FLUSH_INTERVAL:
4932         {
4933             unsigned long res;
4934 
4935             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4936                 error_report("Invalid flush interval specified");
4937                 return 1;
4938             }
4939             flush_interval = res;
4940             break;
4941         }
4942         case OPTION_NO_DRAIN:
4943             drain_on_flush = false;
4944             break;
4945         case OPTION_IMAGE_OPTS:
4946             image_opts = true;
4947             break;
4948         }
4949     }
4950 
4951     if (optind != argc - 1) {
4952         error_exit(argv[0], "Expecting one image file name");
4953     }
4954     filename = argv[argc - 1];
4955 
4956     if (!is_write && flush_interval) {
4957         error_report("--flush-interval is only available in write tests");
4958         ret = -1;
4959         goto out;
4960     }
4961     if (flush_interval && flush_interval < depth) {
4962         error_report("Flush interval can't be smaller than depth");
4963         ret = -1;
4964         goto out;
4965     }
4966 
4967     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4968                    force_share);
4969     if (!blk) {
4970         ret = -1;
4971         goto out;
4972     }
4973 
4974     image_size = blk_getlength(blk);
4975     if (image_size < 0) {
4976         ret = image_size;
4977         goto out;
4978     }
4979 
4980     data = (BenchData) {
4981         .blk            = blk,
4982         .image_size     = image_size,
4983         .bufsize        = bufsize,
4984         .step           = step ?: bufsize,
4985         .nrreq          = depth,
4986         .n              = count,
4987         .offset         = offset,
4988         .write          = is_write,
4989         .flush_interval = flush_interval,
4990         .drain_on_flush = drain_on_flush,
4991     };
4992     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4993            "(starting at offset %" PRId64 ", step size %d)\n",
4994            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4995            data.offset, data.step);
4996     if (flush_interval) {
4997         printf("Sending flush every %d requests\n", flush_interval);
4998     }
4999 
5000     buf_size = data.nrreq * data.bufsize;
5001     data.buf = blk_blockalign(blk, buf_size);
5002     memset(data.buf, pattern, data.nrreq * data.bufsize);
5003 
5004     blk_register_buf(blk, data.buf, buf_size, &error_fatal);
5005 
5006     data.qiov = g_new(QEMUIOVector, data.nrreq);
5007     for (i = 0; i < data.nrreq; i++) {
5008         qemu_iovec_init(&data.qiov[i], 1);
5009         qemu_iovec_add(&data.qiov[i],
5010                        data.buf + i * data.bufsize, data.bufsize);
5011     }
5012 
5013     gettimeofday(&t1, NULL);
5014     bench_cb(&data, 0);
5015 
5016     while (data.n > 0) {
5017         main_loop_wait(false);
5018     }
5019     gettimeofday(&t2, NULL);
5020 
5021     printf("Run completed in %3.3f seconds.\n",
5022            (t2.tv_sec - t1.tv_sec)
5023            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
5024 
5025 out:
5026     if (data.buf) {
5027         blk_unregister_buf(blk, data.buf, buf_size);
5028     }
5029     qemu_vfree(data.buf);
5030     blk_unref(blk);
5031 
5032     if (ret) {
5033         return 1;
5034     }
5035     return 0;
5036 }
5037 
5038 enum ImgBitmapAct {
5039     BITMAP_ADD,
5040     BITMAP_REMOVE,
5041     BITMAP_CLEAR,
5042     BITMAP_ENABLE,
5043     BITMAP_DISABLE,
5044     BITMAP_MERGE,
5045 };
5046 typedef struct ImgBitmapAction {
5047     enum ImgBitmapAct act;
5048     const char *src; /* only used for merge */
5049     QSIMPLEQ_ENTRY(ImgBitmapAction) next;
5050 } ImgBitmapAction;
5051 
5052 static int img_bitmap(const img_cmd_t *ccmd, int argc, char **argv)
5053 {
5054     Error *err = NULL;
5055     int c, ret = 1;
5056     QemuOpts *opts = NULL;
5057     const char *fmt = NULL, *src_fmt = NULL, *src_filename = NULL;
5058     const char *filename, *bitmap;
5059     BlockBackend *blk = NULL, *src = NULL;
5060     BlockDriverState *bs = NULL, *src_bs = NULL;
5061     bool image_opts = false;
5062     int64_t granularity = 0;
5063     bool add = false, merge = false;
5064     QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
5065     ImgBitmapAction *act, *act_next;
5066     const char *op;
5067     int inactivate_ret;
5068 
5069     QSIMPLEQ_INIT(&actions);
5070 
5071     for (;;) {
5072         static const struct option long_options[] = {
5073             {"help", no_argument, 0, 'h'},
5074             {"object", required_argument, 0, OPTION_OBJECT},
5075             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5076             {"add", no_argument, 0, OPTION_ADD},
5077             {"remove", no_argument, 0, OPTION_REMOVE},
5078             {"clear", no_argument, 0, OPTION_CLEAR},
5079             {"enable", no_argument, 0, OPTION_ENABLE},
5080             {"disable", no_argument, 0, OPTION_DISABLE},
5081             {"merge", required_argument, 0, OPTION_MERGE},
5082             {"granularity", required_argument, 0, 'g'},
5083             {"source-file", required_argument, 0, 'b'},
5084             {"source-format", required_argument, 0, 'F'},
5085             {0, 0, 0, 0}
5086         };
5087         c = getopt_long(argc, argv, ":b:f:F:g:h", long_options, NULL);
5088         if (c == -1) {
5089             break;
5090         }
5091 
5092         switch (c) {
5093         case ':':
5094             missing_argument(argv[optind - 1]);
5095             break;
5096         case '?':
5097             unrecognized_option(argv[optind - 1]);
5098             break;
5099         case 'h':
5100             help();
5101             break;
5102         case 'b':
5103             src_filename = optarg;
5104             break;
5105         case 'f':
5106             fmt = optarg;
5107             break;
5108         case 'F':
5109             src_fmt = optarg;
5110             break;
5111         case 'g':
5112             granularity = cvtnum("granularity", optarg);
5113             if (granularity < 0) {
5114                 return 1;
5115             }
5116             break;
5117         case OPTION_ADD:
5118             act = g_new0(ImgBitmapAction, 1);
5119             act->act = BITMAP_ADD;
5120             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5121             add = true;
5122             break;
5123         case OPTION_REMOVE:
5124             act = g_new0(ImgBitmapAction, 1);
5125             act->act = BITMAP_REMOVE;
5126             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5127             break;
5128         case OPTION_CLEAR:
5129             act = g_new0(ImgBitmapAction, 1);
5130             act->act = BITMAP_CLEAR;
5131             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5132             break;
5133         case OPTION_ENABLE:
5134             act = g_new0(ImgBitmapAction, 1);
5135             act->act = BITMAP_ENABLE;
5136             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5137             break;
5138         case OPTION_DISABLE:
5139             act = g_new0(ImgBitmapAction, 1);
5140             act->act = BITMAP_DISABLE;
5141             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5142             break;
5143         case OPTION_MERGE:
5144             act = g_new0(ImgBitmapAction, 1);
5145             act->act = BITMAP_MERGE;
5146             act->src = optarg;
5147             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5148             merge = true;
5149             break;
5150         case OPTION_OBJECT:
5151             user_creatable_process_cmdline(optarg);
5152             break;
5153         case OPTION_IMAGE_OPTS:
5154             image_opts = true;
5155             break;
5156         }
5157     }
5158 
5159     if (QSIMPLEQ_EMPTY(&actions)) {
5160         error_report("Need at least one of --add, --remove, --clear, "
5161                      "--enable, --disable, or --merge");
5162         goto out;
5163     }
5164 
5165     if (granularity && !add) {
5166         error_report("granularity only supported with --add");
5167         goto out;
5168     }
5169     if (src_fmt && !src_filename) {
5170         error_report("-F only supported with -b");
5171         goto out;
5172     }
5173     if (src_filename && !merge) {
5174         error_report("Merge bitmap source file only supported with "
5175                      "--merge");
5176         goto out;
5177     }
5178 
5179     if (optind != argc - 2) {
5180         error_report("Expecting filename and bitmap name");
5181         goto out;
5182     }
5183 
5184     filename = argv[optind];
5185     bitmap = argv[optind + 1];
5186 
5187     /*
5188      * No need to open backing chains; we will be manipulating bitmaps
5189      * directly in this image without reference to image contents.
5190      */
5191     blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_NO_BACKING,
5192                    false, false, false);
5193     if (!blk) {
5194         goto out;
5195     }
5196     bs = blk_bs(blk);
5197     if (src_filename) {
5198         src = img_open(false, src_filename, src_fmt, BDRV_O_NO_BACKING,
5199                        false, false, false);
5200         if (!src) {
5201             goto out;
5202         }
5203         src_bs = blk_bs(src);
5204     } else {
5205         src_bs = bs;
5206     }
5207 
5208     QSIMPLEQ_FOREACH_SAFE(act, &actions, next, act_next) {
5209         switch (act->act) {
5210         case BITMAP_ADD:
5211             qmp_block_dirty_bitmap_add(bs->node_name, bitmap,
5212                                        !!granularity, granularity, true, true,
5213                                        false, false, &err);
5214             op = "add";
5215             break;
5216         case BITMAP_REMOVE:
5217             qmp_block_dirty_bitmap_remove(bs->node_name, bitmap, &err);
5218             op = "remove";
5219             break;
5220         case BITMAP_CLEAR:
5221             qmp_block_dirty_bitmap_clear(bs->node_name, bitmap, &err);
5222             op = "clear";
5223             break;
5224         case BITMAP_ENABLE:
5225             qmp_block_dirty_bitmap_enable(bs->node_name, bitmap, &err);
5226             op = "enable";
5227             break;
5228         case BITMAP_DISABLE:
5229             qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err);
5230             op = "disable";
5231             break;
5232         case BITMAP_MERGE:
5233             do_dirty_bitmap_merge(bs->node_name, bitmap, src_bs->node_name,
5234                                   act->src, &err);
5235             op = "merge";
5236             break;
5237         default:
5238             g_assert_not_reached();
5239         }
5240 
5241         if (err) {
5242             error_reportf_err(err, "Operation %s on bitmap %s failed: ",
5243                               op, bitmap);
5244             goto out;
5245         }
5246         g_free(act);
5247     }
5248 
5249     ret = 0;
5250 
5251  out:
5252     /*
5253      * Manually inactivate the images first because this way we can know whether
5254      * an error occurred. blk_unref() doesn't tell us about failures.
5255      */
5256     inactivate_ret = bdrv_inactivate_all();
5257     if (inactivate_ret < 0) {
5258         error_report("Error while closing the image: %s", strerror(-inactivate_ret));
5259         ret = 1;
5260     }
5261 
5262     blk_unref(src);
5263     blk_unref(blk);
5264     qemu_opts_del(opts);
5265     return ret;
5266 }
5267 
5268 #define C_BS      01
5269 #define C_COUNT   02
5270 #define C_IF      04
5271 #define C_OF      010
5272 #define C_SKIP    020
5273 
5274 struct DdInfo {
5275     unsigned int flags;
5276     int64_t count;
5277 };
5278 
5279 struct DdIo {
5280     int bsz;    /* Block size */
5281     char *filename;
5282     uint8_t *buf;
5283     int64_t offset;
5284 };
5285 
5286 struct DdOpts {
5287     const char *name;
5288     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
5289     unsigned int flag;
5290 };
5291 
5292 static int img_dd_bs(const char *arg,
5293                      struct DdIo *in, struct DdIo *out,
5294                      struct DdInfo *dd)
5295 {
5296     int64_t res;
5297 
5298     res = cvtnum_full("bs", arg, 1, INT_MAX);
5299 
5300     if (res < 0) {
5301         return 1;
5302     }
5303     in->bsz = out->bsz = res;
5304 
5305     return 0;
5306 }
5307 
5308 static int img_dd_count(const char *arg,
5309                         struct DdIo *in, struct DdIo *out,
5310                         struct DdInfo *dd)
5311 {
5312     dd->count = cvtnum("count", arg);
5313 
5314     if (dd->count < 0) {
5315         return 1;
5316     }
5317 
5318     return 0;
5319 }
5320 
5321 static int img_dd_if(const char *arg,
5322                      struct DdIo *in, struct DdIo *out,
5323                      struct DdInfo *dd)
5324 {
5325     in->filename = g_strdup(arg);
5326 
5327     return 0;
5328 }
5329 
5330 static int img_dd_of(const char *arg,
5331                      struct DdIo *in, struct DdIo *out,
5332                      struct DdInfo *dd)
5333 {
5334     out->filename = g_strdup(arg);
5335 
5336     return 0;
5337 }
5338 
5339 static int img_dd_skip(const char *arg,
5340                        struct DdIo *in, struct DdIo *out,
5341                        struct DdInfo *dd)
5342 {
5343     in->offset = cvtnum("skip", arg);
5344 
5345     if (in->offset < 0) {
5346         return 1;
5347     }
5348 
5349     return 0;
5350 }
5351 
5352 static int img_dd(const img_cmd_t *ccmd, int argc, char **argv)
5353 {
5354     int ret = 0;
5355     char *arg = NULL;
5356     char *tmp;
5357     BlockDriver *drv = NULL, *proto_drv = NULL;
5358     BlockBackend *blk1 = NULL, *blk2 = NULL;
5359     QemuOpts *opts = NULL;
5360     QemuOptsList *create_opts = NULL;
5361     Error *local_err = NULL;
5362     bool image_opts = false;
5363     int c, i;
5364     const char *out_fmt = "raw";
5365     const char *fmt = NULL;
5366     int64_t size = 0;
5367     int64_t out_pos, in_pos;
5368     bool force_share = false;
5369     struct DdInfo dd = {
5370         .flags = 0,
5371         .count = 0,
5372     };
5373     struct DdIo in = {
5374         .bsz = 512, /* Block size is by default 512 bytes */
5375         .filename = NULL,
5376         .buf = NULL,
5377         .offset = 0
5378     };
5379     struct DdIo out = {
5380         .bsz = 512,
5381         .filename = NULL,
5382         .buf = NULL,
5383         .offset = 0
5384     };
5385 
5386     const struct DdOpts options[] = {
5387         { "bs", img_dd_bs, C_BS },
5388         { "count", img_dd_count, C_COUNT },
5389         { "if", img_dd_if, C_IF },
5390         { "of", img_dd_of, C_OF },
5391         { "skip", img_dd_skip, C_SKIP },
5392         { NULL, NULL, 0 }
5393     };
5394     const struct option long_options[] = {
5395         { "help", no_argument, 0, 'h'},
5396         { "object", required_argument, 0, OPTION_OBJECT},
5397         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5398         { "force-share", no_argument, 0, 'U'},
5399         { 0, 0, 0, 0 }
5400     };
5401 
5402     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
5403         if (c == EOF) {
5404             break;
5405         }
5406         switch (c) {
5407         case 'O':
5408             out_fmt = optarg;
5409             break;
5410         case 'f':
5411             fmt = optarg;
5412             break;
5413         case ':':
5414             missing_argument(argv[optind - 1]);
5415             break;
5416         case '?':
5417             unrecognized_option(argv[optind - 1]);
5418             break;
5419         case 'h':
5420             help();
5421             break;
5422         case 'U':
5423             force_share = true;
5424             break;
5425         case OPTION_OBJECT:
5426             user_creatable_process_cmdline(optarg);
5427             break;
5428         case OPTION_IMAGE_OPTS:
5429             image_opts = true;
5430             break;
5431         }
5432     }
5433 
5434     for (i = optind; i < argc; i++) {
5435         int j;
5436         arg = g_strdup(argv[i]);
5437 
5438         tmp = strchr(arg, '=');
5439         if (tmp == NULL) {
5440             error_report("unrecognized operand %s", arg);
5441             ret = -1;
5442             goto out;
5443         }
5444 
5445         *tmp++ = '\0';
5446 
5447         for (j = 0; options[j].name != NULL; j++) {
5448             if (!strcmp(arg, options[j].name)) {
5449                 break;
5450             }
5451         }
5452         if (options[j].name == NULL) {
5453             error_report("unrecognized operand %s", arg);
5454             ret = -1;
5455             goto out;
5456         }
5457 
5458         if (options[j].f(tmp, &in, &out, &dd) != 0) {
5459             ret = -1;
5460             goto out;
5461         }
5462         dd.flags |= options[j].flag;
5463         g_free(arg);
5464         arg = NULL;
5465     }
5466 
5467     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
5468         error_report("Must specify both input and output files");
5469         ret = -1;
5470         goto out;
5471     }
5472 
5473     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
5474                     force_share);
5475 
5476     if (!blk1) {
5477         ret = -1;
5478         goto out;
5479     }
5480 
5481     drv = bdrv_find_format(out_fmt);
5482     if (!drv) {
5483         error_report("Unknown file format");
5484         ret = -1;
5485         goto out;
5486     }
5487     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
5488 
5489     if (!proto_drv) {
5490         error_report_err(local_err);
5491         ret = -1;
5492         goto out;
5493     }
5494     if (!drv->create_opts) {
5495         error_report("Format driver '%s' does not support image creation",
5496                      drv->format_name);
5497         ret = -1;
5498         goto out;
5499     }
5500     if (!proto_drv->create_opts) {
5501         error_report("Protocol driver '%s' does not support image creation",
5502                      proto_drv->format_name);
5503         ret = -1;
5504         goto out;
5505     }
5506     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5507     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5508 
5509     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5510 
5511     size = blk_getlength(blk1);
5512     if (size < 0) {
5513         error_report("Failed to get size for '%s'", in.filename);
5514         ret = -1;
5515         goto out;
5516     }
5517 
5518     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
5519         dd.count * in.bsz < size) {
5520         size = dd.count * in.bsz;
5521     }
5522 
5523     /* Overflow means the specified offset is beyond input image's size */
5524     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5525                               size < in.bsz * in.offset)) {
5526         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
5527     } else {
5528         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
5529                             size - in.bsz * in.offset, &error_abort);
5530     }
5531 
5532     ret = bdrv_create(drv, out.filename, opts, &local_err);
5533     if (ret < 0) {
5534         error_reportf_err(local_err,
5535                           "%s: error while creating output image: ",
5536                           out.filename);
5537         ret = -1;
5538         goto out;
5539     }
5540 
5541     /* TODO, we can't honour --image-opts for the target,
5542      * since it needs to be given in a format compatible
5543      * with the bdrv_create() call above which does not
5544      * support image-opts style.
5545      */
5546     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
5547                          false, false, false);
5548 
5549     if (!blk2) {
5550         ret = -1;
5551         goto out;
5552     }
5553 
5554     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5555                               size < in.offset * in.bsz)) {
5556         /* We give a warning if the skip option is bigger than the input
5557          * size and create an empty output disk image (i.e. like dd(1)).
5558          */
5559         error_report("%s: cannot skip to specified offset", in.filename);
5560         in_pos = size;
5561     } else {
5562         in_pos = in.offset * in.bsz;
5563     }
5564 
5565     in.buf = g_new(uint8_t, in.bsz);
5566 
5567     for (out_pos = 0; in_pos < size; ) {
5568         int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
5569 
5570         ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
5571         if (ret < 0) {
5572             error_report("error while reading from input image file: %s",
5573                          strerror(-ret));
5574             goto out;
5575         }
5576         in_pos += bytes;
5577 
5578         ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
5579         if (ret < 0) {
5580             error_report("error while writing to output image file: %s",
5581                          strerror(-ret));
5582             goto out;
5583         }
5584         out_pos += bytes;
5585     }
5586 
5587 out:
5588     g_free(arg);
5589     qemu_opts_del(opts);
5590     qemu_opts_free(create_opts);
5591     blk_unref(blk1);
5592     blk_unref(blk2);
5593     g_free(in.filename);
5594     g_free(out.filename);
5595     g_free(in.buf);
5596     g_free(out.buf);
5597 
5598     if (ret) {
5599         return 1;
5600     }
5601     return 0;
5602 }
5603 
5604 static void dump_json_block_measure_info(BlockMeasureInfo *info)
5605 {
5606     GString *str;
5607     QObject *obj;
5608     Visitor *v = qobject_output_visitor_new(&obj);
5609 
5610     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
5611     visit_complete(v, &obj);
5612     str = qobject_to_json_pretty(obj, true);
5613     assert(str != NULL);
5614     printf("%s\n", str->str);
5615     qobject_unref(obj);
5616     visit_free(v);
5617     g_string_free(str, true);
5618 }
5619 
5620 static int img_measure(const img_cmd_t *ccmd, int argc, char **argv)
5621 {
5622     static const struct option long_options[] = {
5623         {"help", no_argument, 0, 'h'},
5624         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5625         {"object", required_argument, 0, OPTION_OBJECT},
5626         {"output", required_argument, 0, OPTION_OUTPUT},
5627         {"size", required_argument, 0, OPTION_SIZE},
5628         {"force-share", no_argument, 0, 'U'},
5629         {0, 0, 0, 0}
5630     };
5631     OutputFormat output_format = OFORMAT_HUMAN;
5632     BlockBackend *in_blk = NULL;
5633     BlockDriver *drv;
5634     const char *filename = NULL;
5635     const char *fmt = NULL;
5636     const char *out_fmt = "raw";
5637     char *options = NULL;
5638     char *snapshot_name = NULL;
5639     bool force_share = false;
5640     QemuOpts *opts = NULL;
5641     QemuOpts *object_opts = NULL;
5642     QemuOpts *sn_opts = NULL;
5643     QemuOptsList *create_opts = NULL;
5644     bool image_opts = false;
5645     int64_t img_size = -1;
5646     BlockMeasureInfo *info = NULL;
5647     Error *local_err = NULL;
5648     int ret = 1;
5649     int c;
5650 
5651     while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
5652                             long_options, NULL)) != -1) {
5653         switch (c) {
5654         case '?':
5655         case 'h':
5656             help();
5657             break;
5658         case 'f':
5659             fmt = optarg;
5660             break;
5661         case 'O':
5662             out_fmt = optarg;
5663             break;
5664         case 'o':
5665             if (accumulate_options(&options, optarg) < 0) {
5666                 goto out;
5667             }
5668             break;
5669         case 'l':
5670             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
5671                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
5672                                                   optarg, false);
5673                 if (!sn_opts) {
5674                     error_report("Failed in parsing snapshot param '%s'",
5675                                  optarg);
5676                     goto out;
5677                 }
5678             } else {
5679                 snapshot_name = optarg;
5680             }
5681             break;
5682         case 'U':
5683             force_share = true;
5684             break;
5685         case OPTION_OBJECT:
5686             user_creatable_process_cmdline(optarg);
5687             break;
5688         case OPTION_IMAGE_OPTS:
5689             image_opts = true;
5690             break;
5691         case OPTION_OUTPUT:
5692             output_format = parse_output_format(argv[0], optarg);
5693             break;
5694         case OPTION_SIZE:
5695             img_size = cvtnum("image size", optarg);
5696             if (img_size < 0) {
5697                 goto out;
5698             }
5699             break;
5700         }
5701     }
5702 
5703     if (argc - optind > 1) {
5704         error_report("At most one filename argument is allowed.");
5705         goto out;
5706     } else if (argc - optind == 1) {
5707         filename = argv[optind];
5708     }
5709 
5710     if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
5711         error_report("--image-opts, -f, and -l require a filename argument.");
5712         goto out;
5713     }
5714     if (filename && img_size != -1) {
5715         error_report("--size N cannot be used together with a filename.");
5716         goto out;
5717     }
5718     if (!filename && img_size == -1) {
5719         error_report("Either --size N or one filename must be specified.");
5720         goto out;
5721     }
5722 
5723     if (filename) {
5724         in_blk = img_open(image_opts, filename, fmt, 0,
5725                           false, false, force_share);
5726         if (!in_blk) {
5727             goto out;
5728         }
5729 
5730         if (sn_opts) {
5731             bdrv_snapshot_load_tmp(blk_bs(in_blk),
5732                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
5733                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
5734                     &local_err);
5735         } else if (snapshot_name != NULL) {
5736             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
5737                     snapshot_name, &local_err);
5738         }
5739         if (local_err) {
5740             error_reportf_err(local_err, "Failed to load snapshot: ");
5741             goto out;
5742         }
5743     }
5744 
5745     drv = bdrv_find_format(out_fmt);
5746     if (!drv) {
5747         error_report("Unknown file format '%s'", out_fmt);
5748         goto out;
5749     }
5750     if (!drv->create_opts) {
5751         error_report("Format driver '%s' does not support image creation",
5752                      drv->format_name);
5753         goto out;
5754     }
5755 
5756     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5757     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
5758     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5759     if (options) {
5760         if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
5761             error_report_err(local_err);
5762             error_report("Invalid options for file format '%s'", out_fmt);
5763             goto out;
5764         }
5765     }
5766     if (img_size != -1) {
5767         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5768     }
5769 
5770     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
5771     if (local_err) {
5772         error_report_err(local_err);
5773         goto out;
5774     }
5775 
5776     if (output_format == OFORMAT_HUMAN) {
5777         printf("required size: %" PRIu64 "\n", info->required);
5778         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5779         if (info->has_bitmaps) {
5780             printf("bitmaps size: %" PRIu64 "\n", info->bitmaps);
5781         }
5782     } else {
5783         dump_json_block_measure_info(info);
5784     }
5785 
5786     ret = 0;
5787 
5788 out:
5789     qapi_free_BlockMeasureInfo(info);
5790     qemu_opts_del(object_opts);
5791     qemu_opts_del(opts);
5792     qemu_opts_del(sn_opts);
5793     qemu_opts_free(create_opts);
5794     g_free(options);
5795     blk_unref(in_blk);
5796     return ret;
5797 }
5798 
5799 static const img_cmd_t img_cmds[] = {
5800 #define DEF(option, callback, arg_string)        \
5801     { option, callback },
5802 #include "qemu-img-cmds.h"
5803 #undef DEF
5804     { NULL, NULL, },
5805 };
5806 
5807 int main(int argc, char **argv)
5808 {
5809     const img_cmd_t *cmd;
5810     const char *cmdname;
5811     int c;
5812     static const struct option long_options[] = {
5813         {"help", no_argument, 0, 'h'},
5814         {"version", no_argument, 0, 'V'},
5815         {"trace", required_argument, NULL, 'T'},
5816         {0, 0, 0, 0}
5817     };
5818 
5819 #ifdef CONFIG_POSIX
5820     signal(SIGPIPE, SIG_IGN);
5821 #endif
5822 
5823     socket_init();
5824     error_init(argv[0]);
5825     module_call_init(MODULE_INIT_TRACE);
5826     qemu_init_exec_dir(argv[0]);
5827 
5828     qemu_init_main_loop(&error_fatal);
5829 
5830     qcrypto_init(&error_fatal);
5831 
5832     module_call_init(MODULE_INIT_QOM);
5833     bdrv_init();
5834 
5835     qemu_add_opts(&qemu_source_opts);
5836     qemu_add_opts(&qemu_trace_opts);
5837 
5838     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5839         switch (c) {
5840         case ':':
5841             missing_argument(argv[optind - 1]);
5842             return 0;
5843         case '?':
5844             unrecognized_option(argv[optind - 1]);
5845             return 0;
5846         case 'h':
5847             help();
5848             return 0;
5849         case 'V':
5850             printf(QEMU_IMG_VERSION);
5851             return 0;
5852         case 'T':
5853             trace_opt_parse(optarg);
5854             break;
5855         }
5856     }
5857 
5858     if (optind >= argc) {
5859         error_exit(argv[0], "Not enough arguments");
5860     }
5861 
5862     cmdname = argv[optind];
5863 
5864     if (!trace_init_backends()) {
5865         exit(1);
5866     }
5867     trace_init_file();
5868     qemu_set_log(LOG_TRACE, &error_fatal);
5869 
5870     /* find the command */
5871     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5872         if (!strcmp(cmdname, cmd->name)) {
5873             g_autofree char *argv0 = g_strdup_printf("%s %s", argv[0], cmdname);
5874             /* reset options and getopt processing (incl return order) */
5875             argv += optind;
5876             argc -= optind;
5877             qemu_reset_optind();
5878             argv[0] = argv0;
5879             return cmd->handler(cmd, argc, argv);
5880         }
5881     }
5882 
5883     /* not found */
5884     error_exit(argv[0], "Command not found: %s", cmdname);
5885 }
5886