xref: /openbmc/qemu/qemu-img.c (revision e0c72452)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu/help-texts.h"
29 #include "qemu/qemu-progress.h"
30 #include "qemu-version.h"
31 #include "qapi/error.h"
32 #include "qapi/qapi-commands-block-core.h"
33 #include "qapi/qapi-visit-block-core.h"
34 #include "qapi/qobject-output-visitor.h"
35 #include "qapi/qmp/qjson.h"
36 #include "qapi/qmp/qdict.h"
37 #include "qemu/cutils.h"
38 #include "qemu/config-file.h"
39 #include "qemu/option.h"
40 #include "qemu/error-report.h"
41 #include "qemu/log.h"
42 #include "qemu/main-loop.h"
43 #include "qemu/module.h"
44 #include "qemu/sockets.h"
45 #include "qemu/units.h"
46 #include "qemu/memalign.h"
47 #include "qom/object_interfaces.h"
48 #include "sysemu/block-backend.h"
49 #include "block/block_int.h"
50 #include "block/blockjob.h"
51 #include "block/dirty-bitmap.h"
52 #include "block/qapi.h"
53 #include "crypto/init.h"
54 #include "trace/control.h"
55 #include "qemu/throttle.h"
56 #include "block/throttle-groups.h"
57 
58 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
59                           "\n" QEMU_COPYRIGHT "\n"
60 
61 typedef struct img_cmd_t {
62     const char *name;
63     int (*handler)(int argc, char **argv);
64 } img_cmd_t;
65 
66 enum {
67     OPTION_OUTPUT = 256,
68     OPTION_BACKING_CHAIN = 257,
69     OPTION_OBJECT = 258,
70     OPTION_IMAGE_OPTS = 259,
71     OPTION_PATTERN = 260,
72     OPTION_FLUSH_INTERVAL = 261,
73     OPTION_NO_DRAIN = 262,
74     OPTION_TARGET_IMAGE_OPTS = 263,
75     OPTION_SIZE = 264,
76     OPTION_PREALLOCATION = 265,
77     OPTION_SHRINK = 266,
78     OPTION_SALVAGE = 267,
79     OPTION_TARGET_IS_ZERO = 268,
80     OPTION_ADD = 269,
81     OPTION_REMOVE = 270,
82     OPTION_CLEAR = 271,
83     OPTION_ENABLE = 272,
84     OPTION_DISABLE = 273,
85     OPTION_MERGE = 274,
86     OPTION_BITMAPS = 275,
87     OPTION_FORCE = 276,
88     OPTION_SKIP_BROKEN = 277,
89 };
90 
91 typedef enum OutputFormat {
92     OFORMAT_JSON,
93     OFORMAT_HUMAN,
94 } OutputFormat;
95 
96 /* Default to cache=writeback as data integrity is not important for qemu-img */
97 #define BDRV_DEFAULT_CACHE "writeback"
98 
99 static void format_print(void *opaque, const char *name)
100 {
101     printf(" %s", name);
102 }
103 
104 static G_NORETURN G_GNUC_PRINTF(1, 2)
105 void error_exit(const char *fmt, ...)
106 {
107     va_list ap;
108 
109     va_start(ap, fmt);
110     error_vreport(fmt, ap);
111     va_end(ap);
112 
113     error_printf("Try 'qemu-img --help' for more information\n");
114     exit(EXIT_FAILURE);
115 }
116 
117 static G_NORETURN
118 void missing_argument(const char *option)
119 {
120     error_exit("missing argument for option '%s'", option);
121 }
122 
123 static G_NORETURN
124 void unrecognized_option(const char *option)
125 {
126     error_exit("unrecognized option '%s'", option);
127 }
128 
129 /* Please keep in synch with docs/tools/qemu-img.rst */
130 static G_NORETURN
131 void help(void)
132 {
133     const char *help_msg =
134            QEMU_IMG_VERSION
135            "usage: qemu-img [standard options] command [command options]\n"
136            "QEMU disk image utility\n"
137            "\n"
138            "    '-h', '--help'       display this help and exit\n"
139            "    '-V', '--version'    output version information and exit\n"
140            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
141            "                         specify tracing options\n"
142            "\n"
143            "Command syntax:\n"
144 #define DEF(option, callback, arg_string)        \
145            "  " arg_string "\n"
146 #include "qemu-img-cmds.h"
147 #undef DEF
148            "\n"
149            "Command parameters:\n"
150            "  'filename' is a disk image filename\n"
151            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
152            "    manual page for a description of the object properties. The most common\n"
153            "    object type is a 'secret', which is used to supply passwords and/or\n"
154            "    encryption keys.\n"
155            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
156            "  'cache' is the cache mode used to write the output disk image, the valid\n"
157            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
158            "    'directsync' and 'unsafe' (default for convert)\n"
159            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
160            "    options are the same as for the 'cache' option\n"
161            "  'size' is the disk image size in bytes. Optional suffixes\n"
162            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
163            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
164            "    supported. 'b' is ignored.\n"
165            "  'output_filename' is the destination disk image filename\n"
166            "  'output_fmt' is the destination format\n"
167            "  'options' is a comma separated list of format specific options in a\n"
168            "    name=value format. Use -o help for an overview of the options supported by\n"
169            "    the used format\n"
170            "  'snapshot_param' is param used for internal snapshot, format\n"
171            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
172            "    '[ID_OR_NAME]'\n"
173            "  '-c' indicates that target image must be compressed (qcow format only)\n"
174            "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
175            "       new backing file match exactly. The image doesn't need a working\n"
176            "       backing file before rebasing in this case (useful for renaming the\n"
177            "       backing file). For image creation, allow creating without attempting\n"
178            "       to open the backing file.\n"
179            "  '-h' with or without a command shows this help and lists the supported formats\n"
180            "  '-p' show progress of command (only certain commands)\n"
181            "  '-q' use Quiet mode - do not print any output (except errors)\n"
182            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
183            "       contain only zeros for qemu-img to create a sparse image during\n"
184            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
185            "       unallocated or zero sectors, and the destination image will always be\n"
186            "       fully allocated\n"
187            "  '--output' takes the format in which the output must be done (human or json)\n"
188            "  '-n' skips the target volume creation (useful if the volume is created\n"
189            "       prior to running qemu-img)\n"
190            "\n"
191            "Parameters to bitmap subcommand:\n"
192            "  'bitmap' is the name of the bitmap to manipulate, through one or more\n"
193            "       actions from '--add', '--remove', '--clear', '--enable', '--disable',\n"
194            "       or '--merge source'\n"
195            "  '-g granularity' sets the granularity for '--add' actions\n"
196            "  '-b source' and '-F src_fmt' tell '--merge' actions to find the source\n"
197            "       bitmaps from an alternative file\n"
198            "\n"
199            "Parameters to check subcommand:\n"
200            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
201            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
202            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
203            "       hiding corruption that has already occurred.\n"
204            "\n"
205            "Parameters to convert subcommand:\n"
206            "  '--bitmaps' copies all top-level persistent bitmaps to destination\n"
207            "  '-m' specifies how many coroutines work in parallel during the convert\n"
208            "       process (defaults to 8)\n"
209            "  '-W' allow to write to the target out of order rather than sequential\n"
210            "\n"
211            "Parameters to snapshot subcommand:\n"
212            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
213            "  '-a' applies a snapshot (revert disk to saved state)\n"
214            "  '-c' creates a snapshot\n"
215            "  '-d' deletes a snapshot\n"
216            "  '-l' lists all snapshots in the given image\n"
217            "\n"
218            "Parameters to compare subcommand:\n"
219            "  '-f' first image format\n"
220            "  '-F' second image format\n"
221            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
222            "\n"
223            "Parameters to dd subcommand:\n"
224            "  'bs=BYTES' read and write up to BYTES bytes at a time "
225            "(default: 512)\n"
226            "  'count=N' copy only N input blocks\n"
227            "  'if=FILE' read from FILE\n"
228            "  'of=FILE' write to FILE\n"
229            "  'skip=N' skip N bs-sized blocks at the start of input\n";
230 
231     printf("%s\nSupported formats:", help_msg);
232     bdrv_iterate_format(format_print, NULL, false);
233     printf("\n\n" QEMU_HELP_BOTTOM "\n");
234     exit(EXIT_SUCCESS);
235 }
236 
237 /*
238  * Is @list safe for accumulate_options()?
239  * It is when multiple of them can be joined together separated by ','.
240  * To make that work, @list must not start with ',' (or else a
241  * separating ',' preceding it gets escaped), and it must not end with
242  * an odd number of ',' (or else a separating ',' following it gets
243  * escaped), or be empty (or else a separating ',' preceding it can
244  * escape a separating ',' following it).
245  *
246  */
247 static bool is_valid_option_list(const char *list)
248 {
249     size_t len = strlen(list);
250     size_t i;
251 
252     if (!list[0] || list[0] == ',') {
253         return false;
254     }
255 
256     for (i = len; i > 0 && list[i - 1] == ','; i--) {
257     }
258     if ((len - i) % 2) {
259         return false;
260     }
261 
262     return true;
263 }
264 
265 static int accumulate_options(char **options, char *list)
266 {
267     char *new_options;
268 
269     if (!is_valid_option_list(list)) {
270         error_report("Invalid option list: %s", list);
271         return -1;
272     }
273 
274     if (!*options) {
275         *options = g_strdup(list);
276     } else {
277         new_options = g_strdup_printf("%s,%s", *options, list);
278         g_free(*options);
279         *options = new_options;
280     }
281     return 0;
282 }
283 
284 static QemuOptsList qemu_source_opts = {
285     .name = "source",
286     .implied_opt_name = "file",
287     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
288     .desc = {
289         { }
290     },
291 };
292 
293 static int G_GNUC_PRINTF(2, 3) qprintf(bool quiet, const char *fmt, ...)
294 {
295     int ret = 0;
296     if (!quiet) {
297         va_list args;
298         va_start(args, fmt);
299         ret = vprintf(fmt, args);
300         va_end(args);
301     }
302     return ret;
303 }
304 
305 
306 static int print_block_option_help(const char *filename, const char *fmt)
307 {
308     BlockDriver *drv, *proto_drv;
309     QemuOptsList *create_opts = NULL;
310     Error *local_err = NULL;
311 
312     /* Find driver and parse its options */
313     drv = bdrv_find_format(fmt);
314     if (!drv) {
315         error_report("Unknown file format '%s'", fmt);
316         return 1;
317     }
318 
319     if (!drv->create_opts) {
320         error_report("Format driver '%s' does not support image creation", fmt);
321         return 1;
322     }
323 
324     create_opts = qemu_opts_append(create_opts, drv->create_opts);
325     if (filename) {
326         proto_drv = bdrv_find_protocol(filename, true, &local_err);
327         if (!proto_drv) {
328             error_report_err(local_err);
329             qemu_opts_free(create_opts);
330             return 1;
331         }
332         if (!proto_drv->create_opts) {
333             error_report("Protocol driver '%s' does not support image creation",
334                          proto_drv->format_name);
335             qemu_opts_free(create_opts);
336             return 1;
337         }
338         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
339     }
340 
341     if (filename) {
342         printf("Supported options:\n");
343     } else {
344         printf("Supported %s options:\n", fmt);
345     }
346     qemu_opts_print_help(create_opts, false);
347     qemu_opts_free(create_opts);
348 
349     if (!filename) {
350         printf("\n"
351                "The protocol level may support further options.\n"
352                "Specify the target filename to include those options.\n");
353     }
354 
355     return 0;
356 }
357 
358 
359 static BlockBackend *img_open_opts(const char *optstr,
360                                    QemuOpts *opts, int flags, bool writethrough,
361                                    bool quiet, bool force_share)
362 {
363     QDict *options;
364     Error *local_err = NULL;
365     BlockBackend *blk;
366     options = qemu_opts_to_qdict(opts, NULL);
367     if (force_share) {
368         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
369             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
370             error_report("--force-share/-U conflicts with image options");
371             qobject_unref(options);
372             return NULL;
373         }
374         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
375     }
376     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
377     if (!blk) {
378         error_reportf_err(local_err, "Could not open '%s': ", optstr);
379         return NULL;
380     }
381     blk_set_enable_write_cache(blk, !writethrough);
382 
383     return blk;
384 }
385 
386 static BlockBackend *img_open_file(const char *filename,
387                                    QDict *options,
388                                    const char *fmt, int flags,
389                                    bool writethrough, bool quiet,
390                                    bool force_share)
391 {
392     BlockBackend *blk;
393     Error *local_err = NULL;
394 
395     if (!options) {
396         options = qdict_new();
397     }
398     if (fmt) {
399         qdict_put_str(options, "driver", fmt);
400     }
401 
402     if (force_share) {
403         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
404     }
405     blk = blk_new_open(filename, NULL, options, flags, &local_err);
406     if (!blk) {
407         error_reportf_err(local_err, "Could not open '%s': ", filename);
408         return NULL;
409     }
410     blk_set_enable_write_cache(blk, !writethrough);
411 
412     return blk;
413 }
414 
415 
416 static int img_add_key_secrets(void *opaque,
417                                const char *name, const char *value,
418                                Error **errp)
419 {
420     QDict *options = opaque;
421 
422     if (g_str_has_suffix(name, "key-secret")) {
423         qdict_put_str(options, name, value);
424     }
425 
426     return 0;
427 }
428 
429 
430 static BlockBackend *img_open(bool image_opts,
431                               const char *filename,
432                               const char *fmt, int flags, bool writethrough,
433                               bool quiet, bool force_share)
434 {
435     BlockBackend *blk;
436     if (image_opts) {
437         QemuOpts *opts;
438         if (fmt) {
439             error_report("--image-opts and --format are mutually exclusive");
440             return NULL;
441         }
442         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
443                                        filename, true);
444         if (!opts) {
445             return NULL;
446         }
447         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
448                             force_share);
449     } else {
450         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
451                             force_share);
452     }
453 
454     if (blk) {
455         blk_set_force_allow_inactivate(blk);
456     }
457 
458     return blk;
459 }
460 
461 
462 static int add_old_style_options(const char *fmt, QemuOpts *opts,
463                                  const char *base_filename,
464                                  const char *base_fmt)
465 {
466     if (base_filename) {
467         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
468                           NULL)) {
469             error_report("Backing file not supported for file format '%s'",
470                          fmt);
471             return -1;
472         }
473     }
474     if (base_fmt) {
475         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
476             error_report("Backing file format not supported for file "
477                          "format '%s'", fmt);
478             return -1;
479         }
480     }
481     return 0;
482 }
483 
484 static int64_t cvtnum_full(const char *name, const char *value, int64_t min,
485                            int64_t max)
486 {
487     int err;
488     uint64_t res;
489 
490     err = qemu_strtosz(value, NULL, &res);
491     if (err < 0 && err != -ERANGE) {
492         error_report("Invalid %s specified. You may use "
493                      "k, M, G, T, P or E suffixes for", name);
494         error_report("kilobytes, megabytes, gigabytes, terabytes, "
495                      "petabytes and exabytes.");
496         return err;
497     }
498     if (err == -ERANGE || res > max || res < min) {
499         error_report("Invalid %s specified. Must be between %" PRId64
500                      " and %" PRId64 ".", name, min, max);
501         return -ERANGE;
502     }
503     return res;
504 }
505 
506 static int64_t cvtnum(const char *name, const char *value)
507 {
508     return cvtnum_full(name, value, 0, INT64_MAX);
509 }
510 
511 static int img_create(int argc, char **argv)
512 {
513     int c;
514     uint64_t img_size = -1;
515     const char *fmt = "raw";
516     const char *base_fmt = NULL;
517     const char *filename;
518     const char *base_filename = NULL;
519     char *options = NULL;
520     Error *local_err = NULL;
521     bool quiet = false;
522     int flags = 0;
523 
524     for(;;) {
525         static const struct option long_options[] = {
526             {"help", no_argument, 0, 'h'},
527             {"object", required_argument, 0, OPTION_OBJECT},
528             {0, 0, 0, 0}
529         };
530         c = getopt_long(argc, argv, ":F:b:f:ho:qu",
531                         long_options, NULL);
532         if (c == -1) {
533             break;
534         }
535         switch(c) {
536         case ':':
537             missing_argument(argv[optind - 1]);
538             break;
539         case '?':
540             unrecognized_option(argv[optind - 1]);
541             break;
542         case 'h':
543             help();
544             break;
545         case 'F':
546             base_fmt = optarg;
547             break;
548         case 'b':
549             base_filename = optarg;
550             break;
551         case 'f':
552             fmt = optarg;
553             break;
554         case 'o':
555             if (accumulate_options(&options, optarg) < 0) {
556                 goto fail;
557             }
558             break;
559         case 'q':
560             quiet = true;
561             break;
562         case 'u':
563             flags |= BDRV_O_NO_BACKING;
564             break;
565         case OPTION_OBJECT:
566             user_creatable_process_cmdline(optarg);
567             break;
568         }
569     }
570 
571     /* Get the filename */
572     filename = (optind < argc) ? argv[optind] : NULL;
573     if (options && has_help_option(options)) {
574         g_free(options);
575         return print_block_option_help(filename, fmt);
576     }
577 
578     if (optind >= argc) {
579         error_exit("Expecting image file name");
580     }
581     optind++;
582 
583     /* Get image size, if specified */
584     if (optind < argc) {
585         int64_t sval;
586 
587         sval = cvtnum("image size", argv[optind++]);
588         if (sval < 0) {
589             goto fail;
590         }
591         img_size = (uint64_t)sval;
592     }
593     if (optind != argc) {
594         error_exit("Unexpected argument: %s", argv[optind]);
595     }
596 
597     bdrv_img_create(filename, fmt, base_filename, base_fmt,
598                     options, img_size, flags, quiet, &local_err);
599     if (local_err) {
600         error_reportf_err(local_err, "%s: ", filename);
601         goto fail;
602     }
603 
604     g_free(options);
605     return 0;
606 
607 fail:
608     g_free(options);
609     return 1;
610 }
611 
612 static void dump_json_image_check(ImageCheck *check, bool quiet)
613 {
614     GString *str;
615     QObject *obj;
616     Visitor *v = qobject_output_visitor_new(&obj);
617 
618     visit_type_ImageCheck(v, NULL, &check, &error_abort);
619     visit_complete(v, &obj);
620     str = qobject_to_json_pretty(obj, true);
621     assert(str != NULL);
622     qprintf(quiet, "%s\n", str->str);
623     qobject_unref(obj);
624     visit_free(v);
625     g_string_free(str, true);
626 }
627 
628 static void dump_human_image_check(ImageCheck *check, bool quiet)
629 {
630     if (!(check->corruptions || check->leaks || check->check_errors)) {
631         qprintf(quiet, "No errors were found on the image.\n");
632     } else {
633         if (check->corruptions) {
634             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
635                     "Data may be corrupted, or further writes to the image "
636                     "may corrupt it.\n",
637                     check->corruptions);
638         }
639 
640         if (check->leaks) {
641             qprintf(quiet,
642                     "\n%" PRId64 " leaked clusters were found on the image.\n"
643                     "This means waste of disk space, but no harm to data.\n",
644                     check->leaks);
645         }
646 
647         if (check->check_errors) {
648             qprintf(quiet,
649                     "\n%" PRId64
650                     " internal errors have occurred during the check.\n",
651                     check->check_errors);
652         }
653     }
654 
655     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
656         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
657                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
658                 check->allocated_clusters, check->total_clusters,
659                 check->allocated_clusters * 100.0 / check->total_clusters,
660                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
661                 check->compressed_clusters * 100.0 /
662                 check->allocated_clusters);
663     }
664 
665     if (check->image_end_offset) {
666         qprintf(quiet,
667                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
668     }
669 }
670 
671 static int collect_image_check(BlockDriverState *bs,
672                    ImageCheck *check,
673                    const char *filename,
674                    const char *fmt,
675                    int fix)
676 {
677     int ret;
678     BdrvCheckResult result;
679 
680     ret = bdrv_check(bs, &result, fix);
681     if (ret < 0) {
682         return ret;
683     }
684 
685     check->filename                 = g_strdup(filename);
686     check->format                   = g_strdup(bdrv_get_format_name(bs));
687     check->check_errors             = result.check_errors;
688     check->corruptions              = result.corruptions;
689     check->has_corruptions          = result.corruptions != 0;
690     check->leaks                    = result.leaks;
691     check->has_leaks                = result.leaks != 0;
692     check->corruptions_fixed        = result.corruptions_fixed;
693     check->has_corruptions_fixed    = result.corruptions_fixed != 0;
694     check->leaks_fixed              = result.leaks_fixed;
695     check->has_leaks_fixed          = result.leaks_fixed != 0;
696     check->image_end_offset         = result.image_end_offset;
697     check->has_image_end_offset     = result.image_end_offset != 0;
698     check->total_clusters           = result.bfi.total_clusters;
699     check->has_total_clusters       = result.bfi.total_clusters != 0;
700     check->allocated_clusters       = result.bfi.allocated_clusters;
701     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
702     check->fragmented_clusters      = result.bfi.fragmented_clusters;
703     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
704     check->compressed_clusters      = result.bfi.compressed_clusters;
705     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
706 
707     return 0;
708 }
709 
710 /*
711  * Checks an image for consistency. Exit codes:
712  *
713  *  0 - Check completed, image is good
714  *  1 - Check not completed because of internal errors
715  *  2 - Check completed, image is corrupted
716  *  3 - Check completed, image has leaked clusters, but is good otherwise
717  * 63 - Checks are not supported by the image format
718  */
719 static int img_check(int argc, char **argv)
720 {
721     int c, ret;
722     OutputFormat output_format = OFORMAT_HUMAN;
723     const char *filename, *fmt, *output, *cache;
724     BlockBackend *blk;
725     BlockDriverState *bs;
726     int fix = 0;
727     int flags = BDRV_O_CHECK;
728     bool writethrough;
729     ImageCheck *check;
730     bool quiet = false;
731     bool image_opts = false;
732     bool force_share = false;
733 
734     fmt = NULL;
735     output = NULL;
736     cache = BDRV_DEFAULT_CACHE;
737 
738     for(;;) {
739         int option_index = 0;
740         static const struct option long_options[] = {
741             {"help", no_argument, 0, 'h'},
742             {"format", required_argument, 0, 'f'},
743             {"repair", required_argument, 0, 'r'},
744             {"output", required_argument, 0, OPTION_OUTPUT},
745             {"object", required_argument, 0, OPTION_OBJECT},
746             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
747             {"force-share", no_argument, 0, 'U'},
748             {0, 0, 0, 0}
749         };
750         c = getopt_long(argc, argv, ":hf:r:T:qU",
751                         long_options, &option_index);
752         if (c == -1) {
753             break;
754         }
755         switch(c) {
756         case ':':
757             missing_argument(argv[optind - 1]);
758             break;
759         case '?':
760             unrecognized_option(argv[optind - 1]);
761             break;
762         case 'h':
763             help();
764             break;
765         case 'f':
766             fmt = optarg;
767             break;
768         case 'r':
769             flags |= BDRV_O_RDWR;
770 
771             if (!strcmp(optarg, "leaks")) {
772                 fix = BDRV_FIX_LEAKS;
773             } else if (!strcmp(optarg, "all")) {
774                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
775             } else {
776                 error_exit("Unknown option value for -r "
777                            "(expecting 'leaks' or 'all'): %s", optarg);
778             }
779             break;
780         case OPTION_OUTPUT:
781             output = optarg;
782             break;
783         case 'T':
784             cache = optarg;
785             break;
786         case 'q':
787             quiet = true;
788             break;
789         case 'U':
790             force_share = true;
791             break;
792         case OPTION_OBJECT:
793             user_creatable_process_cmdline(optarg);
794             break;
795         case OPTION_IMAGE_OPTS:
796             image_opts = true;
797             break;
798         }
799     }
800     if (optind != argc - 1) {
801         error_exit("Expecting one image file name");
802     }
803     filename = argv[optind++];
804 
805     if (output && !strcmp(output, "json")) {
806         output_format = OFORMAT_JSON;
807     } else if (output && !strcmp(output, "human")) {
808         output_format = OFORMAT_HUMAN;
809     } else if (output) {
810         error_report("--output must be used with human or json as argument.");
811         return 1;
812     }
813 
814     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
815     if (ret < 0) {
816         error_report("Invalid source cache option: %s", cache);
817         return 1;
818     }
819 
820     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
821                    force_share);
822     if (!blk) {
823         return 1;
824     }
825     bs = blk_bs(blk);
826 
827     check = g_new0(ImageCheck, 1);
828     ret = collect_image_check(bs, check, filename, fmt, fix);
829 
830     if (ret == -ENOTSUP) {
831         error_report("This image format does not support checks");
832         ret = 63;
833         goto fail;
834     }
835 
836     if (check->corruptions_fixed || check->leaks_fixed) {
837         int corruptions_fixed, leaks_fixed;
838         bool has_leaks_fixed, has_corruptions_fixed;
839 
840         leaks_fixed         = check->leaks_fixed;
841         has_leaks_fixed     = check->has_leaks_fixed;
842         corruptions_fixed   = check->corruptions_fixed;
843         has_corruptions_fixed = check->has_corruptions_fixed;
844 
845         if (output_format == OFORMAT_HUMAN) {
846             qprintf(quiet,
847                     "The following inconsistencies were found and repaired:\n\n"
848                     "    %" PRId64 " leaked clusters\n"
849                     "    %" PRId64 " corruptions\n\n"
850                     "Double checking the fixed image now...\n",
851                     check->leaks_fixed,
852                     check->corruptions_fixed);
853         }
854 
855         qapi_free_ImageCheck(check);
856         check = g_new0(ImageCheck, 1);
857         ret = collect_image_check(bs, check, filename, fmt, 0);
858 
859         check->leaks_fixed          = leaks_fixed;
860         check->has_leaks_fixed      = has_leaks_fixed;
861         check->corruptions_fixed    = corruptions_fixed;
862         check->has_corruptions_fixed = has_corruptions_fixed;
863     }
864 
865     if (!ret) {
866         switch (output_format) {
867         case OFORMAT_HUMAN:
868             dump_human_image_check(check, quiet);
869             break;
870         case OFORMAT_JSON:
871             dump_json_image_check(check, quiet);
872             break;
873         }
874     }
875 
876     if (ret || check->check_errors) {
877         if (ret) {
878             error_report("Check failed: %s", strerror(-ret));
879         } else {
880             error_report("Check failed");
881         }
882         ret = 1;
883         goto fail;
884     }
885 
886     if (check->corruptions) {
887         ret = 2;
888     } else if (check->leaks) {
889         ret = 3;
890     } else {
891         ret = 0;
892     }
893 
894 fail:
895     qapi_free_ImageCheck(check);
896     blk_unref(blk);
897     return ret;
898 }
899 
900 typedef struct CommonBlockJobCBInfo {
901     BlockDriverState *bs;
902     Error **errp;
903 } CommonBlockJobCBInfo;
904 
905 static void common_block_job_cb(void *opaque, int ret)
906 {
907     CommonBlockJobCBInfo *cbi = opaque;
908 
909     if (ret < 0) {
910         error_setg_errno(cbi->errp, -ret, "Block job failed");
911     }
912 }
913 
914 static void run_block_job(BlockJob *job, Error **errp)
915 {
916     uint64_t progress_current, progress_total;
917     AioContext *aio_context = block_job_get_aio_context(job);
918     int ret = 0;
919 
920     job_lock();
921     job_ref_locked(&job->job);
922     do {
923         float progress = 0.0f;
924         job_unlock();
925         aio_poll(aio_context, true);
926 
927         progress_get_snapshot(&job->job.progress, &progress_current,
928                               &progress_total);
929         if (progress_total) {
930             progress = (float)progress_current / progress_total * 100.f;
931         }
932         qemu_progress_print(progress, 0);
933         job_lock();
934     } while (!job_is_ready_locked(&job->job) &&
935              !job_is_completed_locked(&job->job));
936 
937     if (!job_is_completed_locked(&job->job)) {
938         ret = job_complete_sync_locked(&job->job, errp);
939     } else {
940         ret = job->job.ret;
941     }
942     job_unref_locked(&job->job);
943     job_unlock();
944 
945     /* publish completion progress only when success */
946     if (!ret) {
947         qemu_progress_print(100.f, 0);
948     }
949 }
950 
951 static int img_commit(int argc, char **argv)
952 {
953     int c, ret, flags;
954     const char *filename, *fmt, *cache, *base;
955     BlockBackend *blk;
956     BlockDriverState *bs, *base_bs;
957     BlockJob *job;
958     bool progress = false, quiet = false, drop = false;
959     bool writethrough;
960     Error *local_err = NULL;
961     CommonBlockJobCBInfo cbi;
962     bool image_opts = false;
963     AioContext *aio_context;
964     int64_t rate_limit = 0;
965 
966     fmt = NULL;
967     cache = BDRV_DEFAULT_CACHE;
968     base = NULL;
969     for(;;) {
970         static const struct option long_options[] = {
971             {"help", no_argument, 0, 'h'},
972             {"object", required_argument, 0, OPTION_OBJECT},
973             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
974             {0, 0, 0, 0}
975         };
976         c = getopt_long(argc, argv, ":f:ht:b:dpqr:",
977                         long_options, NULL);
978         if (c == -1) {
979             break;
980         }
981         switch(c) {
982         case ':':
983             missing_argument(argv[optind - 1]);
984             break;
985         case '?':
986             unrecognized_option(argv[optind - 1]);
987             break;
988         case 'h':
989             help();
990             break;
991         case 'f':
992             fmt = optarg;
993             break;
994         case 't':
995             cache = optarg;
996             break;
997         case 'b':
998             base = optarg;
999             /* -b implies -d */
1000             drop = true;
1001             break;
1002         case 'd':
1003             drop = true;
1004             break;
1005         case 'p':
1006             progress = true;
1007             break;
1008         case 'q':
1009             quiet = true;
1010             break;
1011         case 'r':
1012             rate_limit = cvtnum("rate limit", optarg);
1013             if (rate_limit < 0) {
1014                 return 1;
1015             }
1016             break;
1017         case OPTION_OBJECT:
1018             user_creatable_process_cmdline(optarg);
1019             break;
1020         case OPTION_IMAGE_OPTS:
1021             image_opts = true;
1022             break;
1023         }
1024     }
1025 
1026     /* Progress is not shown in Quiet mode */
1027     if (quiet) {
1028         progress = false;
1029     }
1030 
1031     if (optind != argc - 1) {
1032         error_exit("Expecting one image file name");
1033     }
1034     filename = argv[optind++];
1035 
1036     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
1037     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1038     if (ret < 0) {
1039         error_report("Invalid cache option: %s", cache);
1040         return 1;
1041     }
1042 
1043     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1044                    false);
1045     if (!blk) {
1046         return 1;
1047     }
1048     bs = blk_bs(blk);
1049 
1050     qemu_progress_init(progress, 1.f);
1051     qemu_progress_print(0.f, 100);
1052 
1053     if (base) {
1054         base_bs = bdrv_find_backing_image(bs, base);
1055         if (!base_bs) {
1056             error_setg(&local_err,
1057                        "Did not find '%s' in the backing chain of '%s'",
1058                        base, filename);
1059             goto done;
1060         }
1061     } else {
1062         /* This is different from QMP, which by default uses the deepest file in
1063          * the backing chain (i.e., the very base); however, the traditional
1064          * behavior of qemu-img commit is using the immediate backing file. */
1065         base_bs = bdrv_backing_chain_next(bs);
1066         if (!base_bs) {
1067             error_setg(&local_err, "Image does not have a backing file");
1068             goto done;
1069         }
1070     }
1071 
1072     cbi = (CommonBlockJobCBInfo){
1073         .errp = &local_err,
1074         .bs   = bs,
1075     };
1076 
1077     aio_context = bdrv_get_aio_context(bs);
1078     aio_context_acquire(aio_context);
1079     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit,
1080                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1081                         &cbi, false, &local_err);
1082     aio_context_release(aio_context);
1083     if (local_err) {
1084         goto done;
1085     }
1086 
1087     /* When the block job completes, the BlockBackend reference will point to
1088      * the old backing file. In order to avoid that the top image is already
1089      * deleted, so we can still empty it afterwards, increment the reference
1090      * counter here preemptively. */
1091     if (!drop) {
1092         bdrv_ref(bs);
1093     }
1094 
1095     job = block_job_get("commit");
1096     assert(job);
1097     run_block_job(job, &local_err);
1098     if (local_err) {
1099         goto unref_backing;
1100     }
1101 
1102     if (!drop) {
1103         BlockBackend *old_backing_blk;
1104 
1105         old_backing_blk = blk_new_with_bs(bs, BLK_PERM_WRITE, BLK_PERM_ALL,
1106                                           &local_err);
1107         if (!old_backing_blk) {
1108             goto unref_backing;
1109         }
1110         ret = blk_make_empty(old_backing_blk, &local_err);
1111         blk_unref(old_backing_blk);
1112         if (ret == -ENOTSUP) {
1113             error_free(local_err);
1114             local_err = NULL;
1115         } else if (ret < 0) {
1116             goto unref_backing;
1117         }
1118     }
1119 
1120 unref_backing:
1121     if (!drop) {
1122         bdrv_unref(bs);
1123     }
1124 
1125 done:
1126     qemu_progress_end();
1127 
1128     /*
1129      * Manually inactivate the image first because this way we can know whether
1130      * an error occurred. blk_unref() doesn't tell us about failures.
1131      */
1132     ret = bdrv_inactivate_all();
1133     if (ret < 0 && !local_err) {
1134         error_setg_errno(&local_err, -ret, "Error while closing the image");
1135     }
1136     blk_unref(blk);
1137 
1138     if (local_err) {
1139         error_report_err(local_err);
1140         return 1;
1141     }
1142 
1143     qprintf(quiet, "Image committed.\n");
1144     return 0;
1145 }
1146 
1147 /*
1148  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1149  * of the first sector boundary within buf where the sector contains a
1150  * non-zero byte.  This function is robust to a buffer that is not
1151  * sector-aligned.
1152  */
1153 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1154 {
1155     int64_t i;
1156     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1157 
1158     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1159         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1160             return i;
1161         }
1162     }
1163     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1164         return i;
1165     }
1166     return -1;
1167 }
1168 
1169 /*
1170  * Returns true iff the first sector pointed to by 'buf' contains at least
1171  * a non-NUL byte.
1172  *
1173  * 'pnum' is set to the number of sectors (including and immediately following
1174  * the first one) that are known to be in the same allocated/unallocated state.
1175  * The function will try to align the end offset to alignment boundaries so
1176  * that the request will at least end aligned and consecutive requests will
1177  * also start at an aligned offset.
1178  */
1179 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1180                                 int64_t sector_num, int alignment)
1181 {
1182     bool is_zero;
1183     int i, tail;
1184 
1185     if (n <= 0) {
1186         *pnum = 0;
1187         return 0;
1188     }
1189     is_zero = buffer_is_zero(buf, BDRV_SECTOR_SIZE);
1190     for(i = 1; i < n; i++) {
1191         buf += BDRV_SECTOR_SIZE;
1192         if (is_zero != buffer_is_zero(buf, BDRV_SECTOR_SIZE)) {
1193             break;
1194         }
1195     }
1196 
1197     if (i == n) {
1198         /*
1199          * The whole buf is the same.
1200          * No reason to split it into chunks, so return now.
1201          */
1202         *pnum = i;
1203         return !is_zero;
1204     }
1205 
1206     tail = (sector_num + i) & (alignment - 1);
1207     if (tail) {
1208         if (is_zero && i <= tail) {
1209             /*
1210              * For sure next sector after i is data, and it will rewrite this
1211              * tail anyway due to RMW. So, let's just write data now.
1212              */
1213             is_zero = false;
1214         }
1215         if (!is_zero) {
1216             /* If possible, align up end offset of allocated areas. */
1217             i += alignment - tail;
1218             i = MIN(i, n);
1219         } else {
1220             /*
1221              * For sure next sector after i is data, and it will rewrite this
1222              * tail anyway due to RMW. Better is avoid RMW and write zeroes up
1223              * to aligned bound.
1224              */
1225             i -= tail;
1226         }
1227     }
1228     *pnum = i;
1229     return !is_zero;
1230 }
1231 
1232 /*
1233  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1234  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1235  * breaking up write requests for only small sparse areas.
1236  */
1237 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1238     int min, int64_t sector_num, int alignment)
1239 {
1240     int ret;
1241     int num_checked, num_used;
1242 
1243     if (n < min) {
1244         min = n;
1245     }
1246 
1247     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1248     if (!ret) {
1249         return ret;
1250     }
1251 
1252     num_used = *pnum;
1253     buf += BDRV_SECTOR_SIZE * *pnum;
1254     n -= *pnum;
1255     sector_num += *pnum;
1256     num_checked = num_used;
1257 
1258     while (n > 0) {
1259         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1260 
1261         buf += BDRV_SECTOR_SIZE * *pnum;
1262         n -= *pnum;
1263         sector_num += *pnum;
1264         num_checked += *pnum;
1265         if (ret) {
1266             num_used = num_checked;
1267         } else if (*pnum >= min) {
1268             break;
1269         }
1270     }
1271 
1272     *pnum = num_used;
1273     return 1;
1274 }
1275 
1276 /*
1277  * Compares two buffers sector by sector. Returns 0 if the first
1278  * sector of each buffer matches, non-zero otherwise.
1279  *
1280  * pnum is set to the sector-aligned size of the buffer prefix that
1281  * has the same matching status as the first sector.
1282  */
1283 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1284                            int64_t bytes, int64_t *pnum)
1285 {
1286     bool res;
1287     int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
1288 
1289     assert(bytes > 0);
1290 
1291     res = !!memcmp(buf1, buf2, i);
1292     while (i < bytes) {
1293         int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
1294 
1295         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1296             break;
1297         }
1298         i += len;
1299     }
1300 
1301     *pnum = i;
1302     return res;
1303 }
1304 
1305 #define IO_BUF_SIZE (2 * MiB)
1306 
1307 /*
1308  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1309  *
1310  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1311  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1312  * failure), and 4 on error (the exit status for read errors), after emitting
1313  * an error message.
1314  *
1315  * @param blk:  BlockBackend for the image
1316  * @param offset: Starting offset to check
1317  * @param bytes: Number of bytes to check
1318  * @param filename: Name of disk file we are checking (logging purpose)
1319  * @param buffer: Allocated buffer for storing read data
1320  * @param quiet: Flag for quiet mode
1321  */
1322 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1323                                int64_t bytes, const char *filename,
1324                                uint8_t *buffer, bool quiet)
1325 {
1326     int ret = 0;
1327     int64_t idx;
1328 
1329     ret = blk_pread(blk, offset, bytes, buffer, 0);
1330     if (ret < 0) {
1331         error_report("Error while reading offset %" PRId64 " of %s: %s",
1332                      offset, filename, strerror(-ret));
1333         return 4;
1334     }
1335     idx = find_nonzero(buffer, bytes);
1336     if (idx >= 0) {
1337         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1338                 offset + idx);
1339         return 1;
1340     }
1341 
1342     return 0;
1343 }
1344 
1345 /*
1346  * Compares two images. Exit codes:
1347  *
1348  * 0 - Images are identical or the requested help was printed
1349  * 1 - Images differ
1350  * >1 - Error occurred
1351  */
1352 static int img_compare(int argc, char **argv)
1353 {
1354     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1355     BlockBackend *blk1, *blk2;
1356     BlockDriverState *bs1, *bs2;
1357     int64_t total_size1, total_size2;
1358     uint8_t *buf1 = NULL, *buf2 = NULL;
1359     int64_t pnum1, pnum2;
1360     int allocated1, allocated2;
1361     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1362     bool progress = false, quiet = false, strict = false;
1363     int flags;
1364     bool writethrough;
1365     int64_t total_size;
1366     int64_t offset = 0;
1367     int64_t chunk;
1368     int c;
1369     uint64_t progress_base;
1370     bool image_opts = false;
1371     bool force_share = false;
1372 
1373     cache = BDRV_DEFAULT_CACHE;
1374     for (;;) {
1375         static const struct option long_options[] = {
1376             {"help", no_argument, 0, 'h'},
1377             {"object", required_argument, 0, OPTION_OBJECT},
1378             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1379             {"force-share", no_argument, 0, 'U'},
1380             {0, 0, 0, 0}
1381         };
1382         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1383                         long_options, NULL);
1384         if (c == -1) {
1385             break;
1386         }
1387         switch (c) {
1388         case ':':
1389             missing_argument(argv[optind - 1]);
1390             break;
1391         case '?':
1392             unrecognized_option(argv[optind - 1]);
1393             break;
1394         case 'h':
1395             help();
1396             break;
1397         case 'f':
1398             fmt1 = optarg;
1399             break;
1400         case 'F':
1401             fmt2 = optarg;
1402             break;
1403         case 'T':
1404             cache = optarg;
1405             break;
1406         case 'p':
1407             progress = true;
1408             break;
1409         case 'q':
1410             quiet = true;
1411             break;
1412         case 's':
1413             strict = true;
1414             break;
1415         case 'U':
1416             force_share = true;
1417             break;
1418         case OPTION_OBJECT:
1419             {
1420                 Error *local_err = NULL;
1421 
1422                 if (!user_creatable_add_from_str(optarg, &local_err)) {
1423                     if (local_err) {
1424                         error_report_err(local_err);
1425                         exit(2);
1426                     } else {
1427                         /* Help was printed */
1428                         exit(EXIT_SUCCESS);
1429                     }
1430                 }
1431                 break;
1432             }
1433         case OPTION_IMAGE_OPTS:
1434             image_opts = true;
1435             break;
1436         }
1437     }
1438 
1439     /* Progress is not shown in Quiet mode */
1440     if (quiet) {
1441         progress = false;
1442     }
1443 
1444 
1445     if (optind != argc - 2) {
1446         error_exit("Expecting two image file names");
1447     }
1448     filename1 = argv[optind++];
1449     filename2 = argv[optind++];
1450 
1451     /* Initialize before goto out */
1452     qemu_progress_init(progress, 2.0);
1453 
1454     flags = 0;
1455     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1456     if (ret < 0) {
1457         error_report("Invalid source cache option: %s", cache);
1458         ret = 2;
1459         goto out3;
1460     }
1461 
1462     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1463                     force_share);
1464     if (!blk1) {
1465         ret = 2;
1466         goto out3;
1467     }
1468 
1469     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1470                     force_share);
1471     if (!blk2) {
1472         ret = 2;
1473         goto out2;
1474     }
1475     bs1 = blk_bs(blk1);
1476     bs2 = blk_bs(blk2);
1477 
1478     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1479     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1480     total_size1 = blk_getlength(blk1);
1481     if (total_size1 < 0) {
1482         error_report("Can't get size of %s: %s",
1483                      filename1, strerror(-total_size1));
1484         ret = 4;
1485         goto out;
1486     }
1487     total_size2 = blk_getlength(blk2);
1488     if (total_size2 < 0) {
1489         error_report("Can't get size of %s: %s",
1490                      filename2, strerror(-total_size2));
1491         ret = 4;
1492         goto out;
1493     }
1494     total_size = MIN(total_size1, total_size2);
1495     progress_base = MAX(total_size1, total_size2);
1496 
1497     qemu_progress_print(0, 100);
1498 
1499     if (strict && total_size1 != total_size2) {
1500         ret = 1;
1501         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1502         goto out;
1503     }
1504 
1505     while (offset < total_size) {
1506         int status1, status2;
1507 
1508         status1 = bdrv_block_status_above(bs1, NULL, offset,
1509                                           total_size1 - offset, &pnum1, NULL,
1510                                           NULL);
1511         if (status1 < 0) {
1512             ret = 3;
1513             error_report("Sector allocation test failed for %s", filename1);
1514             goto out;
1515         }
1516         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1517 
1518         status2 = bdrv_block_status_above(bs2, NULL, offset,
1519                                           total_size2 - offset, &pnum2, NULL,
1520                                           NULL);
1521         if (status2 < 0) {
1522             ret = 3;
1523             error_report("Sector allocation test failed for %s", filename2);
1524             goto out;
1525         }
1526         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1527 
1528         assert(pnum1 && pnum2);
1529         chunk = MIN(pnum1, pnum2);
1530 
1531         if (strict) {
1532             if (status1 != status2) {
1533                 ret = 1;
1534                 qprintf(quiet, "Strict mode: Offset %" PRId64
1535                         " block status mismatch!\n", offset);
1536                 goto out;
1537             }
1538         }
1539         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1540             /* nothing to do */
1541         } else if (allocated1 == allocated2) {
1542             if (allocated1) {
1543                 int64_t pnum;
1544 
1545                 chunk = MIN(chunk, IO_BUF_SIZE);
1546                 ret = blk_pread(blk1, offset, chunk, buf1, 0);
1547                 if (ret < 0) {
1548                     error_report("Error while reading offset %" PRId64
1549                                  " of %s: %s",
1550                                  offset, filename1, strerror(-ret));
1551                     ret = 4;
1552                     goto out;
1553                 }
1554                 ret = blk_pread(blk2, offset, chunk, buf2, 0);
1555                 if (ret < 0) {
1556                     error_report("Error while reading offset %" PRId64
1557                                  " of %s: %s",
1558                                  offset, filename2, strerror(-ret));
1559                     ret = 4;
1560                     goto out;
1561                 }
1562                 ret = compare_buffers(buf1, buf2, chunk, &pnum);
1563                 if (ret || pnum != chunk) {
1564                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1565                             offset + (ret ? 0 : pnum));
1566                     ret = 1;
1567                     goto out;
1568                 }
1569             }
1570         } else {
1571             chunk = MIN(chunk, IO_BUF_SIZE);
1572             if (allocated1) {
1573                 ret = check_empty_sectors(blk1, offset, chunk,
1574                                           filename1, buf1, quiet);
1575             } else {
1576                 ret = check_empty_sectors(blk2, offset, chunk,
1577                                           filename2, buf1, quiet);
1578             }
1579             if (ret) {
1580                 goto out;
1581             }
1582         }
1583         offset += chunk;
1584         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1585     }
1586 
1587     if (total_size1 != total_size2) {
1588         BlockBackend *blk_over;
1589         const char *filename_over;
1590 
1591         qprintf(quiet, "Warning: Image size mismatch!\n");
1592         if (total_size1 > total_size2) {
1593             blk_over = blk1;
1594             filename_over = filename1;
1595         } else {
1596             blk_over = blk2;
1597             filename_over = filename2;
1598         }
1599 
1600         while (offset < progress_base) {
1601             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1602                                           progress_base - offset, &chunk,
1603                                           NULL, NULL);
1604             if (ret < 0) {
1605                 ret = 3;
1606                 error_report("Sector allocation test failed for %s",
1607                              filename_over);
1608                 goto out;
1609 
1610             }
1611             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1612                 chunk = MIN(chunk, IO_BUF_SIZE);
1613                 ret = check_empty_sectors(blk_over, offset, chunk,
1614                                           filename_over, buf1, quiet);
1615                 if (ret) {
1616                     goto out;
1617                 }
1618             }
1619             offset += chunk;
1620             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1621         }
1622     }
1623 
1624     qprintf(quiet, "Images are identical.\n");
1625     ret = 0;
1626 
1627 out:
1628     qemu_vfree(buf1);
1629     qemu_vfree(buf2);
1630     blk_unref(blk2);
1631 out2:
1632     blk_unref(blk1);
1633 out3:
1634     qemu_progress_end();
1635     return ret;
1636 }
1637 
1638 /* Convenience wrapper around qmp_block_dirty_bitmap_merge */
1639 static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name,
1640                                   const char *src_node, const char *src_name,
1641                                   Error **errp)
1642 {
1643     BlockDirtyBitmapOrStr *merge_src;
1644     BlockDirtyBitmapOrStrList *list = NULL;
1645 
1646     merge_src = g_new0(BlockDirtyBitmapOrStr, 1);
1647     merge_src->type = QTYPE_QDICT;
1648     merge_src->u.external.node = g_strdup(src_node);
1649     merge_src->u.external.name = g_strdup(src_name);
1650     QAPI_LIST_PREPEND(list, merge_src);
1651     qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp);
1652     qapi_free_BlockDirtyBitmapOrStrList(list);
1653 }
1654 
1655 enum ImgConvertBlockStatus {
1656     BLK_DATA,
1657     BLK_ZERO,
1658     BLK_BACKING_FILE,
1659 };
1660 
1661 #define MAX_COROUTINES 16
1662 #define CONVERT_THROTTLE_GROUP "img_convert"
1663 
1664 typedef struct ImgConvertState {
1665     BlockBackend **src;
1666     int64_t *src_sectors;
1667     int *src_alignment;
1668     int src_num;
1669     int64_t total_sectors;
1670     int64_t allocated_sectors;
1671     int64_t allocated_done;
1672     int64_t sector_num;
1673     int64_t wr_offs;
1674     enum ImgConvertBlockStatus status;
1675     int64_t sector_next_status;
1676     BlockBackend *target;
1677     bool has_zero_init;
1678     bool compressed;
1679     bool target_is_new;
1680     bool target_has_backing;
1681     int64_t target_backing_sectors; /* negative if unknown */
1682     bool wr_in_order;
1683     bool copy_range;
1684     bool salvage;
1685     bool quiet;
1686     int min_sparse;
1687     int alignment;
1688     size_t cluster_sectors;
1689     size_t buf_sectors;
1690     long num_coroutines;
1691     int running_coroutines;
1692     Coroutine *co[MAX_COROUTINES];
1693     int64_t wait_sector_num[MAX_COROUTINES];
1694     CoMutex lock;
1695     int ret;
1696 } ImgConvertState;
1697 
1698 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1699                                 int *src_cur, int64_t *src_cur_offset)
1700 {
1701     *src_cur = 0;
1702     *src_cur_offset = 0;
1703     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1704         *src_cur_offset += s->src_sectors[*src_cur];
1705         (*src_cur)++;
1706         assert(*src_cur < s->src_num);
1707     }
1708 }
1709 
1710 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1711 {
1712     int64_t src_cur_offset;
1713     int ret, n, src_cur;
1714     bool post_backing_zero = false;
1715 
1716     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1717 
1718     assert(s->total_sectors > sector_num);
1719     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1720 
1721     if (s->target_backing_sectors >= 0) {
1722         if (sector_num >= s->target_backing_sectors) {
1723             post_backing_zero = true;
1724         } else if (sector_num + n > s->target_backing_sectors) {
1725             /* Split requests around target_backing_sectors (because
1726              * starting from there, zeros are handled differently) */
1727             n = s->target_backing_sectors - sector_num;
1728         }
1729     }
1730 
1731     if (s->sector_next_status <= sector_num) {
1732         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1733         int64_t count;
1734         int tail;
1735         BlockDriverState *src_bs = blk_bs(s->src[src_cur]);
1736         BlockDriverState *base;
1737 
1738         if (s->target_has_backing) {
1739             base = bdrv_cow_bs(bdrv_skip_filters(src_bs));
1740         } else {
1741             base = NULL;
1742         }
1743 
1744         do {
1745             count = n * BDRV_SECTOR_SIZE;
1746 
1747             ret = bdrv_block_status_above(src_bs, base, offset, count, &count,
1748                                           NULL, NULL);
1749 
1750             if (ret < 0) {
1751                 if (s->salvage) {
1752                     if (n == 1) {
1753                         if (!s->quiet) {
1754                             warn_report("error while reading block status at "
1755                                         "offset %" PRIu64 ": %s", offset,
1756                                         strerror(-ret));
1757                         }
1758                         /* Just try to read the data, then */
1759                         ret = BDRV_BLOCK_DATA;
1760                         count = BDRV_SECTOR_SIZE;
1761                     } else {
1762                         /* Retry on a shorter range */
1763                         n = DIV_ROUND_UP(n, 4);
1764                     }
1765                 } else {
1766                     error_report("error while reading block status at offset "
1767                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1768                     return ret;
1769                 }
1770             }
1771         } while (ret < 0);
1772 
1773         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1774 
1775         /*
1776          * Avoid that s->sector_next_status becomes unaligned to the source
1777          * request alignment and/or cluster size to avoid unnecessary read
1778          * cycles.
1779          */
1780         tail = (sector_num - src_cur_offset + n) % s->src_alignment[src_cur];
1781         if (n > tail) {
1782             n -= tail;
1783         }
1784 
1785         if (ret & BDRV_BLOCK_ZERO) {
1786             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1787         } else if (ret & BDRV_BLOCK_DATA) {
1788             s->status = BLK_DATA;
1789         } else {
1790             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1791         }
1792 
1793         s->sector_next_status = sector_num + n;
1794     }
1795 
1796     n = MIN(n, s->sector_next_status - sector_num);
1797     if (s->status == BLK_DATA) {
1798         n = MIN(n, s->buf_sectors);
1799     }
1800 
1801     /* We need to write complete clusters for compressed images, so if an
1802      * unallocated area is shorter than that, we must consider the whole
1803      * cluster allocated. */
1804     if (s->compressed) {
1805         if (n < s->cluster_sectors) {
1806             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1807             s->status = BLK_DATA;
1808         } else {
1809             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1810         }
1811     }
1812 
1813     return n;
1814 }
1815 
1816 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1817                                         int nb_sectors, uint8_t *buf)
1818 {
1819     uint64_t single_read_until = 0;
1820     int n, ret;
1821 
1822     assert(nb_sectors <= s->buf_sectors);
1823     while (nb_sectors > 0) {
1824         BlockBackend *blk;
1825         int src_cur;
1826         int64_t bs_sectors, src_cur_offset;
1827         uint64_t offset;
1828 
1829         /* In the case of compression with multiple source files, we can get a
1830          * nb_sectors that spreads into the next part. So we must be able to
1831          * read across multiple BDSes for one convert_read() call. */
1832         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1833         blk = s->src[src_cur];
1834         bs_sectors = s->src_sectors[src_cur];
1835 
1836         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1837 
1838         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1839         if (single_read_until > offset) {
1840             n = 1;
1841         }
1842 
1843         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1844         if (ret < 0) {
1845             if (s->salvage) {
1846                 if (n > 1) {
1847                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1848                     continue;
1849                 } else {
1850                     if (!s->quiet) {
1851                         warn_report("error while reading offset %" PRIu64
1852                                     ": %s", offset, strerror(-ret));
1853                     }
1854                     memset(buf, 0, BDRV_SECTOR_SIZE);
1855                 }
1856             } else {
1857                 return ret;
1858             }
1859         }
1860 
1861         sector_num += n;
1862         nb_sectors -= n;
1863         buf += n * BDRV_SECTOR_SIZE;
1864     }
1865 
1866     return 0;
1867 }
1868 
1869 
1870 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1871                                          int nb_sectors, uint8_t *buf,
1872                                          enum ImgConvertBlockStatus status)
1873 {
1874     int ret;
1875 
1876     while (nb_sectors > 0) {
1877         int n = nb_sectors;
1878         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1879 
1880         switch (status) {
1881         case BLK_BACKING_FILE:
1882             /* If we have a backing file, leave clusters unallocated that are
1883              * unallocated in the source image, so that the backing file is
1884              * visible at the respective offset. */
1885             assert(s->target_has_backing);
1886             break;
1887 
1888         case BLK_DATA:
1889             /* If we're told to keep the target fully allocated (-S 0) or there
1890              * is real non-zero data, we must write it. Otherwise we can treat
1891              * it as zero sectors.
1892              * Compressed clusters need to be written as a whole, so in that
1893              * case we can only save the write if the buffer is completely
1894              * zeroed. */
1895             if (!s->min_sparse ||
1896                 (!s->compressed &&
1897                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1898                                           sector_num, s->alignment)) ||
1899                 (s->compressed &&
1900                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1901             {
1902                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1903                                     n << BDRV_SECTOR_BITS, buf, flags);
1904                 if (ret < 0) {
1905                     return ret;
1906                 }
1907                 break;
1908             }
1909             /* fall-through */
1910 
1911         case BLK_ZERO:
1912             if (s->has_zero_init) {
1913                 assert(!s->target_has_backing);
1914                 break;
1915             }
1916             ret = blk_co_pwrite_zeroes(s->target,
1917                                        sector_num << BDRV_SECTOR_BITS,
1918                                        n << BDRV_SECTOR_BITS,
1919                                        BDRV_REQ_MAY_UNMAP);
1920             if (ret < 0) {
1921                 return ret;
1922             }
1923             break;
1924         }
1925 
1926         sector_num += n;
1927         nb_sectors -= n;
1928         buf += n * BDRV_SECTOR_SIZE;
1929     }
1930 
1931     return 0;
1932 }
1933 
1934 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1935                                               int nb_sectors)
1936 {
1937     int n, ret;
1938 
1939     while (nb_sectors > 0) {
1940         BlockBackend *blk;
1941         int src_cur;
1942         int64_t bs_sectors, src_cur_offset;
1943         int64_t offset;
1944 
1945         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1946         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1947         blk = s->src[src_cur];
1948         bs_sectors = s->src_sectors[src_cur];
1949 
1950         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1951 
1952         ret = blk_co_copy_range(blk, offset, s->target,
1953                                 sector_num << BDRV_SECTOR_BITS,
1954                                 n << BDRV_SECTOR_BITS, 0, 0);
1955         if (ret < 0) {
1956             return ret;
1957         }
1958 
1959         sector_num += n;
1960         nb_sectors -= n;
1961     }
1962     return 0;
1963 }
1964 
1965 static void coroutine_fn convert_co_do_copy(void *opaque)
1966 {
1967     ImgConvertState *s = opaque;
1968     uint8_t *buf = NULL;
1969     int ret, i;
1970     int index = -1;
1971 
1972     for (i = 0; i < s->num_coroutines; i++) {
1973         if (s->co[i] == qemu_coroutine_self()) {
1974             index = i;
1975             break;
1976         }
1977     }
1978     assert(index >= 0);
1979 
1980     s->running_coroutines++;
1981     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1982 
1983     while (1) {
1984         int n;
1985         int64_t sector_num;
1986         enum ImgConvertBlockStatus status;
1987         bool copy_range;
1988 
1989         qemu_co_mutex_lock(&s->lock);
1990         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1991             qemu_co_mutex_unlock(&s->lock);
1992             break;
1993         }
1994         WITH_GRAPH_RDLOCK_GUARD() {
1995             n = convert_iteration_sectors(s, s->sector_num);
1996         }
1997         if (n < 0) {
1998             qemu_co_mutex_unlock(&s->lock);
1999             s->ret = n;
2000             break;
2001         }
2002         /* save current sector and allocation status to local variables */
2003         sector_num = s->sector_num;
2004         status = s->status;
2005         if (!s->min_sparse && s->status == BLK_ZERO) {
2006             n = MIN(n, s->buf_sectors);
2007         }
2008         /* increment global sector counter so that other coroutines can
2009          * already continue reading beyond this request */
2010         s->sector_num += n;
2011         qemu_co_mutex_unlock(&s->lock);
2012 
2013         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
2014             s->allocated_done += n;
2015             qemu_progress_print(100.0 * s->allocated_done /
2016                                         s->allocated_sectors, 0);
2017         }
2018 
2019 retry:
2020         copy_range = s->copy_range && s->status == BLK_DATA;
2021         if (status == BLK_DATA && !copy_range) {
2022             ret = convert_co_read(s, sector_num, n, buf);
2023             if (ret < 0) {
2024                 error_report("error while reading at byte %lld: %s",
2025                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2026                 s->ret = ret;
2027             }
2028         } else if (!s->min_sparse && status == BLK_ZERO) {
2029             status = BLK_DATA;
2030             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
2031         }
2032 
2033         if (s->wr_in_order) {
2034             /* keep writes in order */
2035             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
2036                 s->wait_sector_num[index] = sector_num;
2037                 qemu_coroutine_yield();
2038             }
2039             s->wait_sector_num[index] = -1;
2040         }
2041 
2042         if (s->ret == -EINPROGRESS) {
2043             if (copy_range) {
2044                 WITH_GRAPH_RDLOCK_GUARD() {
2045                     ret = convert_co_copy_range(s, sector_num, n);
2046                 }
2047                 if (ret) {
2048                     s->copy_range = false;
2049                     goto retry;
2050                 }
2051             } else {
2052                 ret = convert_co_write(s, sector_num, n, buf, status);
2053             }
2054             if (ret < 0) {
2055                 error_report("error while writing at byte %lld: %s",
2056                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2057                 s->ret = ret;
2058             }
2059         }
2060 
2061         if (s->wr_in_order) {
2062             /* reenter the coroutine that might have waited
2063              * for this write to complete */
2064             s->wr_offs = sector_num + n;
2065             for (i = 0; i < s->num_coroutines; i++) {
2066                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
2067                     /*
2068                      * A -> B -> A cannot occur because A has
2069                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
2070                      * B will never enter A during this time window.
2071                      */
2072                     qemu_coroutine_enter(s->co[i]);
2073                     break;
2074                 }
2075             }
2076         }
2077     }
2078 
2079     qemu_vfree(buf);
2080     s->co[index] = NULL;
2081     s->running_coroutines--;
2082     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
2083         /* the convert job finished successfully */
2084         s->ret = 0;
2085     }
2086 }
2087 
2088 static int convert_do_copy(ImgConvertState *s)
2089 {
2090     int ret, i, n;
2091     int64_t sector_num = 0;
2092 
2093     /* Check whether we have zero initialisation or can get it efficiently */
2094     if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
2095         !s->target_has_backing) {
2096         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
2097     }
2098 
2099     /* Allocate buffer for copied data. For compressed images, only one cluster
2100      * can be copied at a time. */
2101     if (s->compressed) {
2102         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2103             error_report("invalid cluster size");
2104             return -EINVAL;
2105         }
2106         s->buf_sectors = s->cluster_sectors;
2107     }
2108 
2109     while (sector_num < s->total_sectors) {
2110         n = convert_iteration_sectors(s, sector_num);
2111         if (n < 0) {
2112             return n;
2113         }
2114         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2115         {
2116             s->allocated_sectors += n;
2117         }
2118         sector_num += n;
2119     }
2120 
2121     /* Do the copy */
2122     s->sector_next_status = 0;
2123     s->ret = -EINPROGRESS;
2124 
2125     qemu_co_mutex_init(&s->lock);
2126     for (i = 0; i < s->num_coroutines; i++) {
2127         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2128         s->wait_sector_num[i] = -1;
2129         qemu_coroutine_enter(s->co[i]);
2130     }
2131 
2132     while (s->running_coroutines) {
2133         main_loop_wait(false);
2134     }
2135 
2136     if (s->compressed && !s->ret) {
2137         /* signal EOF to align */
2138         ret = blk_pwrite_compressed(s->target, 0, 0, NULL);
2139         if (ret < 0) {
2140             return ret;
2141         }
2142     }
2143 
2144     return s->ret;
2145 }
2146 
2147 /* Check that bitmaps can be copied, or output an error */
2148 static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken)
2149 {
2150     BdrvDirtyBitmap *bm;
2151 
2152     if (!bdrv_supports_persistent_dirty_bitmap(src)) {
2153         error_report("Source lacks bitmap support");
2154         return -1;
2155     }
2156     FOR_EACH_DIRTY_BITMAP(src, bm) {
2157         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2158             continue;
2159         }
2160         if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2161             error_report("Cannot copy inconsistent bitmap '%s'",
2162                          bdrv_dirty_bitmap_name(bm));
2163             error_printf("Try --skip-broken-bitmaps, or "
2164                          "use 'qemu-img bitmap --remove' to delete it\n");
2165             return -1;
2166         }
2167     }
2168     return 0;
2169 }
2170 
2171 static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst,
2172                                 bool skip_broken)
2173 {
2174     BdrvDirtyBitmap *bm;
2175     Error *err = NULL;
2176 
2177     FOR_EACH_DIRTY_BITMAP(src, bm) {
2178         const char *name;
2179 
2180         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2181             continue;
2182         }
2183         name = bdrv_dirty_bitmap_name(bm);
2184         if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2185             warn_report("Skipping inconsistent bitmap '%s'", name);
2186             continue;
2187         }
2188         qmp_block_dirty_bitmap_add(dst->node_name, name,
2189                                    true, bdrv_dirty_bitmap_granularity(bm),
2190                                    true, true,
2191                                    true, !bdrv_dirty_bitmap_enabled(bm),
2192                                    &err);
2193         if (err) {
2194             error_reportf_err(err, "Failed to create bitmap %s: ", name);
2195             return -1;
2196         }
2197 
2198         do_dirty_bitmap_merge(dst->node_name, name, src->node_name, name,
2199                               &err);
2200         if (err) {
2201             error_reportf_err(err, "Failed to populate bitmap %s: ", name);
2202             qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL);
2203             return -1;
2204         }
2205     }
2206 
2207     return 0;
2208 }
2209 
2210 #define MAX_BUF_SECTORS 32768
2211 
2212 static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
2213 {
2214     ThrottleConfig cfg;
2215 
2216     throttle_config_init(&cfg);
2217     cfg.buckets[THROTTLE_BPS_WRITE].avg = rate_limit;
2218 
2219     blk_io_limits_enable(blk, CONVERT_THROTTLE_GROUP);
2220     blk_set_io_limits(blk, &cfg);
2221 }
2222 
2223 static int img_convert(int argc, char **argv)
2224 {
2225     int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
2226     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2227                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2228                *out_filename, *out_baseimg_param, *snapshot_name = NULL,
2229                *backing_fmt = NULL;
2230     BlockDriver *drv = NULL, *proto_drv = NULL;
2231     BlockDriverInfo bdi;
2232     BlockDriverState *out_bs;
2233     QemuOpts *opts = NULL, *sn_opts = NULL;
2234     QemuOptsList *create_opts = NULL;
2235     QDict *open_opts = NULL;
2236     char *options = NULL;
2237     Error *local_err = NULL;
2238     bool writethrough, src_writethrough, image_opts = false,
2239          skip_create = false, progress = false, tgt_image_opts = false;
2240     int64_t ret = -EINVAL;
2241     bool force_share = false;
2242     bool explict_min_sparse = false;
2243     bool bitmaps = false;
2244     bool skip_broken = false;
2245     int64_t rate_limit = 0;
2246 
2247     ImgConvertState s = (ImgConvertState) {
2248         /* Need at least 4k of zeros for sparse detection */
2249         .min_sparse         = 8,
2250         .copy_range         = false,
2251         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2252         .wr_in_order        = true,
2253         .num_coroutines     = 8,
2254     };
2255 
2256     for(;;) {
2257         static const struct option long_options[] = {
2258             {"help", no_argument, 0, 'h'},
2259             {"object", required_argument, 0, OPTION_OBJECT},
2260             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2261             {"force-share", no_argument, 0, 'U'},
2262             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2263             {"salvage", no_argument, 0, OPTION_SALVAGE},
2264             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2265             {"bitmaps", no_argument, 0, OPTION_BITMAPS},
2266             {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
2267             {0, 0, 0, 0}
2268         };
2269         c = getopt_long(argc, argv, ":hf:O:B:CcF:o:l:S:pt:T:qnm:WUr:",
2270                         long_options, NULL);
2271         if (c == -1) {
2272             break;
2273         }
2274         switch(c) {
2275         case ':':
2276             missing_argument(argv[optind - 1]);
2277             break;
2278         case '?':
2279             unrecognized_option(argv[optind - 1]);
2280             break;
2281         case 'h':
2282             help();
2283             break;
2284         case 'f':
2285             fmt = optarg;
2286             break;
2287         case 'O':
2288             out_fmt = optarg;
2289             break;
2290         case 'B':
2291             out_baseimg = optarg;
2292             break;
2293         case 'C':
2294             s.copy_range = true;
2295             break;
2296         case 'c':
2297             s.compressed = true;
2298             break;
2299         case 'F':
2300             backing_fmt = optarg;
2301             break;
2302         case 'o':
2303             if (accumulate_options(&options, optarg) < 0) {
2304                 goto fail_getopt;
2305             }
2306             break;
2307         case 'l':
2308             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2309                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2310                                                   optarg, false);
2311                 if (!sn_opts) {
2312                     error_report("Failed in parsing snapshot param '%s'",
2313                                  optarg);
2314                     goto fail_getopt;
2315                 }
2316             } else {
2317                 snapshot_name = optarg;
2318             }
2319             break;
2320         case 'S':
2321         {
2322             int64_t sval;
2323 
2324             sval = cvtnum("buffer size for sparse output", optarg);
2325             if (sval < 0) {
2326                 goto fail_getopt;
2327             } else if (!QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2328                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2329                 error_report("Invalid buffer size for sparse output specified. "
2330                     "Valid sizes are multiples of %llu up to %llu. Select "
2331                     "0 to disable sparse detection (fully allocates output).",
2332                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2333                 goto fail_getopt;
2334             }
2335 
2336             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2337             explict_min_sparse = true;
2338             break;
2339         }
2340         case 'p':
2341             progress = true;
2342             break;
2343         case 't':
2344             cache = optarg;
2345             break;
2346         case 'T':
2347             src_cache = optarg;
2348             break;
2349         case 'q':
2350             s.quiet = true;
2351             break;
2352         case 'n':
2353             skip_create = true;
2354             break;
2355         case 'm':
2356             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2357                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2358                 error_report("Invalid number of coroutines. Allowed number of"
2359                              " coroutines is between 1 and %d", MAX_COROUTINES);
2360                 goto fail_getopt;
2361             }
2362             break;
2363         case 'W':
2364             s.wr_in_order = false;
2365             break;
2366         case 'U':
2367             force_share = true;
2368             break;
2369         case 'r':
2370             rate_limit = cvtnum("rate limit", optarg);
2371             if (rate_limit < 0) {
2372                 goto fail_getopt;
2373             }
2374             break;
2375         case OPTION_OBJECT:
2376             user_creatable_process_cmdline(optarg);
2377             break;
2378         case OPTION_IMAGE_OPTS:
2379             image_opts = true;
2380             break;
2381         case OPTION_SALVAGE:
2382             s.salvage = true;
2383             break;
2384         case OPTION_TARGET_IMAGE_OPTS:
2385             tgt_image_opts = true;
2386             break;
2387         case OPTION_TARGET_IS_ZERO:
2388             /*
2389              * The user asserting that the target is blank has the
2390              * same effect as the target driver supporting zero
2391              * initialisation.
2392              */
2393             s.has_zero_init = true;
2394             break;
2395         case OPTION_BITMAPS:
2396             bitmaps = true;
2397             break;
2398         case OPTION_SKIP_BROKEN:
2399             skip_broken = true;
2400             break;
2401         }
2402     }
2403 
2404     if (!out_fmt && !tgt_image_opts) {
2405         out_fmt = "raw";
2406     }
2407 
2408     if (skip_broken && !bitmaps) {
2409         error_report("Use of --skip-broken-bitmaps requires --bitmaps");
2410         goto fail_getopt;
2411     }
2412 
2413     if (s.compressed && s.copy_range) {
2414         error_report("Cannot enable copy offloading when -c is used");
2415         goto fail_getopt;
2416     }
2417 
2418     if (explict_min_sparse && s.copy_range) {
2419         error_report("Cannot enable copy offloading when -S is used");
2420         goto fail_getopt;
2421     }
2422 
2423     if (s.copy_range && s.salvage) {
2424         error_report("Cannot use copy offloading in salvaging mode");
2425         goto fail_getopt;
2426     }
2427 
2428     if (tgt_image_opts && !skip_create) {
2429         error_report("--target-image-opts requires use of -n flag");
2430         goto fail_getopt;
2431     }
2432 
2433     if (skip_create && options) {
2434         error_report("-o has no effect when skipping image creation");
2435         goto fail_getopt;
2436     }
2437 
2438     if (s.has_zero_init && !skip_create) {
2439         error_report("--target-is-zero requires use of -n flag");
2440         goto fail_getopt;
2441     }
2442 
2443     s.src_num = argc - optind - 1;
2444     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2445 
2446     if (options && has_help_option(options)) {
2447         if (out_fmt) {
2448             ret = print_block_option_help(out_filename, out_fmt);
2449             goto fail_getopt;
2450         } else {
2451             error_report("Option help requires a format be specified");
2452             goto fail_getopt;
2453         }
2454     }
2455 
2456     if (s.src_num < 1) {
2457         error_report("Must specify image file name");
2458         goto fail_getopt;
2459     }
2460 
2461     /* ret is still -EINVAL until here */
2462     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2463     if (ret < 0) {
2464         error_report("Invalid source cache option: %s", src_cache);
2465         goto fail_getopt;
2466     }
2467 
2468     /* Initialize before goto out */
2469     if (s.quiet) {
2470         progress = false;
2471     }
2472     qemu_progress_init(progress, 1.0);
2473     qemu_progress_print(0, 100);
2474 
2475     s.src = g_new0(BlockBackend *, s.src_num);
2476     s.src_sectors = g_new(int64_t, s.src_num);
2477     s.src_alignment = g_new(int, s.src_num);
2478 
2479     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2480         BlockDriverState *src_bs;
2481         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2482                                fmt, src_flags, src_writethrough, s.quiet,
2483                                force_share);
2484         if (!s.src[bs_i]) {
2485             ret = -1;
2486             goto out;
2487         }
2488         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2489         if (s.src_sectors[bs_i] < 0) {
2490             error_report("Could not get size of %s: %s",
2491                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2492             ret = -1;
2493             goto out;
2494         }
2495         src_bs = blk_bs(s.src[bs_i]);
2496         s.src_alignment[bs_i] = DIV_ROUND_UP(src_bs->bl.request_alignment,
2497                                              BDRV_SECTOR_SIZE);
2498         if (!bdrv_get_info(src_bs, &bdi)) {
2499             s.src_alignment[bs_i] = MAX(s.src_alignment[bs_i],
2500                                         bdi.cluster_size / BDRV_SECTOR_SIZE);
2501         }
2502         s.total_sectors += s.src_sectors[bs_i];
2503     }
2504 
2505     if (sn_opts) {
2506         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2507                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2508                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2509                                &local_err);
2510     } else if (snapshot_name != NULL) {
2511         if (s.src_num > 1) {
2512             error_report("No support for concatenating multiple snapshot");
2513             ret = -1;
2514             goto out;
2515         }
2516 
2517         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2518                                              &local_err);
2519     }
2520     if (local_err) {
2521         error_reportf_err(local_err, "Failed to load snapshot: ");
2522         ret = -1;
2523         goto out;
2524     }
2525 
2526     if (!skip_create) {
2527         /* Find driver and parse its options */
2528         drv = bdrv_find_format(out_fmt);
2529         if (!drv) {
2530             error_report("Unknown file format '%s'", out_fmt);
2531             ret = -1;
2532             goto out;
2533         }
2534 
2535         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2536         if (!proto_drv) {
2537             error_report_err(local_err);
2538             ret = -1;
2539             goto out;
2540         }
2541 
2542         if (!drv->create_opts) {
2543             error_report("Format driver '%s' does not support image creation",
2544                          drv->format_name);
2545             ret = -1;
2546             goto out;
2547         }
2548 
2549         if (!proto_drv->create_opts) {
2550             error_report("Protocol driver '%s' does not support image creation",
2551                          proto_drv->format_name);
2552             ret = -1;
2553             goto out;
2554         }
2555 
2556         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2557         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2558 
2559         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2560         if (options) {
2561             if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
2562                 error_report_err(local_err);
2563                 ret = -1;
2564                 goto out;
2565             }
2566         }
2567 
2568         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
2569                             s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
2570         ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
2571         if (ret < 0) {
2572             goto out;
2573         }
2574     }
2575 
2576     /* Get backing file name if -o backing_file was used */
2577     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2578     if (out_baseimg_param) {
2579         out_baseimg = out_baseimg_param;
2580     }
2581     s.target_has_backing = (bool) out_baseimg;
2582 
2583     if (s.has_zero_init && s.target_has_backing) {
2584         error_report("Cannot use --target-is-zero when the destination "
2585                      "image has a backing file");
2586         goto out;
2587     }
2588 
2589     if (s.src_num > 1 && out_baseimg) {
2590         error_report("Having a backing file for the target makes no sense when "
2591                      "concatenating multiple input images");
2592         ret = -1;
2593         goto out;
2594     }
2595 
2596     if (out_baseimg_param) {
2597         if (!qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT)) {
2598             error_report("Use of backing file requires explicit "
2599                          "backing format");
2600             ret = -1;
2601             goto out;
2602         }
2603     }
2604 
2605     /* Check if compression is supported */
2606     if (s.compressed) {
2607         bool encryption =
2608             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2609         const char *encryptfmt =
2610             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2611         const char *preallocation =
2612             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2613 
2614         if (drv && !block_driver_can_compress(drv)) {
2615             error_report("Compression not supported for this file format");
2616             ret = -1;
2617             goto out;
2618         }
2619 
2620         if (encryption || encryptfmt) {
2621             error_report("Compression and encryption not supported at "
2622                          "the same time");
2623             ret = -1;
2624             goto out;
2625         }
2626 
2627         if (preallocation
2628             && strcmp(preallocation, "off"))
2629         {
2630             error_report("Compression and preallocation not supported at "
2631                          "the same time");
2632             ret = -1;
2633             goto out;
2634         }
2635     }
2636 
2637     /* Determine if bitmaps need copying */
2638     if (bitmaps) {
2639         if (s.src_num > 1) {
2640             error_report("Copying bitmaps only possible with single source");
2641             ret = -1;
2642             goto out;
2643         }
2644         ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken);
2645         if (ret < 0) {
2646             goto out;
2647         }
2648     }
2649 
2650     /*
2651      * The later open call will need any decryption secrets, and
2652      * bdrv_create() will purge "opts", so extract them now before
2653      * they are lost.
2654      */
2655     if (!skip_create) {
2656         open_opts = qdict_new();
2657         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2658 
2659         /* Create the new image */
2660         ret = bdrv_create(drv, out_filename, opts, &local_err);
2661         if (ret < 0) {
2662             error_reportf_err(local_err, "%s: error while converting %s: ",
2663                               out_filename, out_fmt);
2664             goto out;
2665         }
2666     }
2667 
2668     s.target_is_new = !skip_create;
2669 
2670     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2671     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2672     if (ret < 0) {
2673         error_report("Invalid cache option: %s", cache);
2674         goto out;
2675     }
2676 
2677     if (flags & BDRV_O_NOCACHE) {
2678         /*
2679          * If we open the target with O_DIRECT, it may be necessary to
2680          * extend its size to align to the physical sector size.
2681          */
2682         flags |= BDRV_O_RESIZE;
2683     }
2684 
2685     if (skip_create) {
2686         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2687                             flags, writethrough, s.quiet, false);
2688     } else {
2689         /* TODO ultimately we should allow --target-image-opts
2690          * to be used even when -n is not given.
2691          * That has to wait for bdrv_create to be improved
2692          * to allow filenames in option syntax
2693          */
2694         s.target = img_open_file(out_filename, open_opts, out_fmt,
2695                                  flags, writethrough, s.quiet, false);
2696         open_opts = NULL; /* blk_new_open will have freed it */
2697     }
2698     if (!s.target) {
2699         ret = -1;
2700         goto out;
2701     }
2702     out_bs = blk_bs(s.target);
2703 
2704     if (bitmaps && !bdrv_supports_persistent_dirty_bitmap(out_bs)) {
2705         error_report("Format driver '%s' does not support bitmaps",
2706                      out_bs->drv->format_name);
2707         ret = -1;
2708         goto out;
2709     }
2710 
2711     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2712         error_report("Compression not supported for this file format");
2713         ret = -1;
2714         goto out;
2715     }
2716 
2717     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2718      * or discard_alignment of the out_bs is greater. Limit to
2719      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2720     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2721                         MAX(s.buf_sectors,
2722                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2723                                 out_bs->bl.pdiscard_alignment >>
2724                                 BDRV_SECTOR_BITS)));
2725 
2726     /* try to align the write requests to the destination to avoid unnecessary
2727      * RMW cycles. */
2728     s.alignment = MAX(pow2floor(s.min_sparse),
2729                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2730                                    BDRV_SECTOR_SIZE));
2731     assert(is_power_of_2(s.alignment));
2732 
2733     if (skip_create) {
2734         int64_t output_sectors = blk_nb_sectors(s.target);
2735         if (output_sectors < 0) {
2736             error_report("unable to get output image length: %s",
2737                          strerror(-output_sectors));
2738             ret = -1;
2739             goto out;
2740         } else if (output_sectors < s.total_sectors) {
2741             error_report("output file is smaller than input file");
2742             ret = -1;
2743             goto out;
2744         }
2745     }
2746 
2747     if (s.target_has_backing && s.target_is_new) {
2748         /* Errors are treated as "backing length unknown" (which means
2749          * s.target_backing_sectors has to be negative, which it will
2750          * be automatically).  The backing file length is used only
2751          * for optimizations, so such a case is not fatal. */
2752         s.target_backing_sectors =
2753             bdrv_nb_sectors(bdrv_backing_chain_next(out_bs));
2754     } else {
2755         s.target_backing_sectors = -1;
2756     }
2757 
2758     ret = bdrv_get_info(out_bs, &bdi);
2759     if (ret < 0) {
2760         if (s.compressed) {
2761             error_report("could not get block driver info");
2762             goto out;
2763         }
2764     } else {
2765         s.compressed = s.compressed || bdi.needs_compressed_writes;
2766         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2767     }
2768 
2769     if (rate_limit) {
2770         set_rate_limit(s.target, rate_limit);
2771     }
2772 
2773     ret = convert_do_copy(&s);
2774 
2775     /* Now copy the bitmaps */
2776     if (bitmaps && ret == 0) {
2777         ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken);
2778     }
2779 
2780 out:
2781     if (!ret) {
2782         qemu_progress_print(100, 0);
2783     }
2784     qemu_progress_end();
2785     qemu_opts_del(opts);
2786     qemu_opts_free(create_opts);
2787     qobject_unref(open_opts);
2788     blk_unref(s.target);
2789     if (s.src) {
2790         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2791             blk_unref(s.src[bs_i]);
2792         }
2793         g_free(s.src);
2794     }
2795     g_free(s.src_sectors);
2796     g_free(s.src_alignment);
2797 fail_getopt:
2798     qemu_opts_del(sn_opts);
2799     g_free(options);
2800 
2801     return !!ret;
2802 }
2803 
2804 
2805 static void dump_snapshots(BlockDriverState *bs)
2806 {
2807     QEMUSnapshotInfo *sn_tab, *sn;
2808     int nb_sns, i;
2809 
2810     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2811     if (nb_sns <= 0)
2812         return;
2813     printf("Snapshot list:\n");
2814     bdrv_snapshot_dump(NULL);
2815     printf("\n");
2816     for(i = 0; i < nb_sns; i++) {
2817         sn = &sn_tab[i];
2818         bdrv_snapshot_dump(sn);
2819         printf("\n");
2820     }
2821     g_free(sn_tab);
2822 }
2823 
2824 static void dump_json_block_graph_info_list(BlockGraphInfoList *list)
2825 {
2826     GString *str;
2827     QObject *obj;
2828     Visitor *v = qobject_output_visitor_new(&obj);
2829 
2830     visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort);
2831     visit_complete(v, &obj);
2832     str = qobject_to_json_pretty(obj, true);
2833     assert(str != NULL);
2834     printf("%s\n", str->str);
2835     qobject_unref(obj);
2836     visit_free(v);
2837     g_string_free(str, true);
2838 }
2839 
2840 static void dump_json_block_graph_info(BlockGraphInfo *info)
2841 {
2842     GString *str;
2843     QObject *obj;
2844     Visitor *v = qobject_output_visitor_new(&obj);
2845 
2846     visit_type_BlockGraphInfo(v, NULL, &info, &error_abort);
2847     visit_complete(v, &obj);
2848     str = qobject_to_json_pretty(obj, true);
2849     assert(str != NULL);
2850     printf("%s\n", str->str);
2851     qobject_unref(obj);
2852     visit_free(v);
2853     g_string_free(str, true);
2854 }
2855 
2856 static void dump_human_image_info(BlockGraphInfo *info, int indentation,
2857                                   const char *path)
2858 {
2859     BlockChildInfoList *children_list;
2860 
2861     bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation,
2862                         info->children == NULL);
2863 
2864     for (children_list = info->children; children_list;
2865          children_list = children_list->next)
2866     {
2867         BlockChildInfo *child = children_list->value;
2868         g_autofree char *child_path = NULL;
2869 
2870         printf("%*sChild node '%s%s':\n",
2871                indentation * 4, "", path, child->name);
2872         child_path = g_strdup_printf("%s%s/", path, child->name);
2873         dump_human_image_info(child->info, indentation + 1, child_path);
2874     }
2875 }
2876 
2877 static void dump_human_image_info_list(BlockGraphInfoList *list)
2878 {
2879     BlockGraphInfoList *elem;
2880     bool delim = false;
2881 
2882     for (elem = list; elem; elem = elem->next) {
2883         if (delim) {
2884             printf("\n");
2885         }
2886         delim = true;
2887 
2888         dump_human_image_info(elem->value, 0, "/");
2889     }
2890 }
2891 
2892 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2893 {
2894     return strcmp(a, b) == 0;
2895 }
2896 
2897 /**
2898  * Open an image file chain and return an BlockGraphInfoList
2899  *
2900  * @filename: topmost image filename
2901  * @fmt: topmost image format (may be NULL to autodetect)
2902  * @chain: true  - enumerate entire backing file chain
2903  *         false - only topmost image file
2904  *
2905  * Returns a list of BlockNodeInfo objects or NULL if there was an error
2906  * opening an image file.  If there was an error a message will have been
2907  * printed to stderr.
2908  */
2909 static BlockGraphInfoList *collect_image_info_list(bool image_opts,
2910                                                    const char *filename,
2911                                                    const char *fmt,
2912                                                    bool chain, bool force_share)
2913 {
2914     BlockGraphInfoList *head = NULL;
2915     BlockGraphInfoList **tail = &head;
2916     GHashTable *filenames;
2917     Error *err = NULL;
2918 
2919     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2920 
2921     while (filename) {
2922         BlockBackend *blk;
2923         BlockDriverState *bs;
2924         BlockGraphInfo *info;
2925 
2926         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2927             error_report("Backing file '%s' creates an infinite loop.",
2928                          filename);
2929             goto err;
2930         }
2931         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2932 
2933         blk = img_open(image_opts, filename, fmt,
2934                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2935                        force_share);
2936         if (!blk) {
2937             goto err;
2938         }
2939         bs = blk_bs(blk);
2940 
2941         /*
2942          * Note that the returned BlockGraphInfo object will not have
2943          * information about this image's backing node, because we have opened
2944          * it with BDRV_O_NO_BACKING.  Printing this object will therefore not
2945          * duplicate the backing chain information that we obtain by walking
2946          * the chain manually here.
2947          */
2948         bdrv_graph_rdlock_main_loop();
2949         bdrv_query_block_graph_info(bs, &info, &err);
2950         bdrv_graph_rdunlock_main_loop();
2951 
2952         if (err) {
2953             error_report_err(err);
2954             blk_unref(blk);
2955             goto err;
2956         }
2957 
2958         QAPI_LIST_APPEND(tail, info);
2959 
2960         blk_unref(blk);
2961 
2962         /* Clear parameters that only apply to the topmost image */
2963         filename = fmt = NULL;
2964         image_opts = false;
2965 
2966         if (chain) {
2967             if (info->full_backing_filename) {
2968                 filename = info->full_backing_filename;
2969             } else if (info->backing_filename) {
2970                 error_report("Could not determine absolute backing filename,"
2971                              " but backing filename '%s' present",
2972                              info->backing_filename);
2973                 goto err;
2974             }
2975             if (info->backing_filename_format) {
2976                 fmt = info->backing_filename_format;
2977             }
2978         }
2979     }
2980     g_hash_table_destroy(filenames);
2981     return head;
2982 
2983 err:
2984     qapi_free_BlockGraphInfoList(head);
2985     g_hash_table_destroy(filenames);
2986     return NULL;
2987 }
2988 
2989 static int img_info(int argc, char **argv)
2990 {
2991     int c;
2992     OutputFormat output_format = OFORMAT_HUMAN;
2993     bool chain = false;
2994     const char *filename, *fmt, *output;
2995     BlockGraphInfoList *list;
2996     bool image_opts = false;
2997     bool force_share = false;
2998 
2999     fmt = NULL;
3000     output = NULL;
3001     for(;;) {
3002         int option_index = 0;
3003         static const struct option long_options[] = {
3004             {"help", no_argument, 0, 'h'},
3005             {"format", required_argument, 0, 'f'},
3006             {"output", required_argument, 0, OPTION_OUTPUT},
3007             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
3008             {"object", required_argument, 0, OPTION_OBJECT},
3009             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3010             {"force-share", no_argument, 0, 'U'},
3011             {0, 0, 0, 0}
3012         };
3013         c = getopt_long(argc, argv, ":f:hU",
3014                         long_options, &option_index);
3015         if (c == -1) {
3016             break;
3017         }
3018         switch(c) {
3019         case ':':
3020             missing_argument(argv[optind - 1]);
3021             break;
3022         case '?':
3023             unrecognized_option(argv[optind - 1]);
3024             break;
3025         case 'h':
3026             help();
3027             break;
3028         case 'f':
3029             fmt = optarg;
3030             break;
3031         case 'U':
3032             force_share = true;
3033             break;
3034         case OPTION_OUTPUT:
3035             output = optarg;
3036             break;
3037         case OPTION_BACKING_CHAIN:
3038             chain = true;
3039             break;
3040         case OPTION_OBJECT:
3041             user_creatable_process_cmdline(optarg);
3042             break;
3043         case OPTION_IMAGE_OPTS:
3044             image_opts = true;
3045             break;
3046         }
3047     }
3048     if (optind != argc - 1) {
3049         error_exit("Expecting one image file name");
3050     }
3051     filename = argv[optind++];
3052 
3053     if (output && !strcmp(output, "json")) {
3054         output_format = OFORMAT_JSON;
3055     } else if (output && !strcmp(output, "human")) {
3056         output_format = OFORMAT_HUMAN;
3057     } else if (output) {
3058         error_report("--output must be used with human or json as argument.");
3059         return 1;
3060     }
3061 
3062     list = collect_image_info_list(image_opts, filename, fmt, chain,
3063                                    force_share);
3064     if (!list) {
3065         return 1;
3066     }
3067 
3068     switch (output_format) {
3069     case OFORMAT_HUMAN:
3070         dump_human_image_info_list(list);
3071         break;
3072     case OFORMAT_JSON:
3073         if (chain) {
3074             dump_json_block_graph_info_list(list);
3075         } else {
3076             dump_json_block_graph_info(list->value);
3077         }
3078         break;
3079     }
3080 
3081     qapi_free_BlockGraphInfoList(list);
3082     return 0;
3083 }
3084 
3085 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
3086                           MapEntry *next)
3087 {
3088     switch (output_format) {
3089     case OFORMAT_HUMAN:
3090         if (e->data && !e->has_offset) {
3091             error_report("File contains external, encrypted or compressed clusters.");
3092             return -1;
3093         }
3094         if (e->data && !e->zero) {
3095             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
3096                    e->start, e->length,
3097                    e->has_offset ? e->offset : 0,
3098                    e->filename ?: "");
3099         }
3100         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
3101          * Modify the flags here to allow more coalescing.
3102          */
3103         if (next && (!next->data || next->zero)) {
3104             next->data = false;
3105             next->zero = true;
3106         }
3107         break;
3108     case OFORMAT_JSON:
3109         printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
3110                " \"depth\": %"PRId64", \"present\": %s, \"zero\": %s,"
3111                " \"data\": %s, \"compressed\": %s",
3112                e->start, e->length, e->depth,
3113                e->present ? "true" : "false",
3114                e->zero ? "true" : "false",
3115                e->data ? "true" : "false",
3116                e->compressed ? "true" : "false");
3117         if (e->has_offset) {
3118             printf(", \"offset\": %"PRId64"", e->offset);
3119         }
3120         putchar('}');
3121 
3122         if (next) {
3123             puts(",");
3124         }
3125         break;
3126     }
3127     return 0;
3128 }
3129 
3130 static int get_block_status(BlockDriverState *bs, int64_t offset,
3131                             int64_t bytes, MapEntry *e)
3132 {
3133     int ret;
3134     int depth;
3135     BlockDriverState *file;
3136     bool has_offset;
3137     int64_t map;
3138     char *filename = NULL;
3139 
3140     /* As an optimization, we could cache the current range of unallocated
3141      * clusters in each file of the chain, and avoid querying the same
3142      * range repeatedly.
3143      */
3144 
3145     depth = 0;
3146     for (;;) {
3147         bs = bdrv_skip_filters(bs);
3148         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
3149         if (ret < 0) {
3150             return ret;
3151         }
3152         assert(bytes);
3153         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
3154             break;
3155         }
3156         bs = bdrv_cow_bs(bs);
3157         if (bs == NULL) {
3158             ret = 0;
3159             break;
3160         }
3161 
3162         depth++;
3163     }
3164 
3165     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
3166 
3167     if (file && has_offset) {
3168         bdrv_graph_rdlock_main_loop();
3169         bdrv_refresh_filename(file);
3170         bdrv_graph_rdunlock_main_loop();
3171         filename = file->filename;
3172     }
3173 
3174     *e = (MapEntry) {
3175         .start = offset,
3176         .length = bytes,
3177         .data = !!(ret & BDRV_BLOCK_DATA),
3178         .zero = !!(ret & BDRV_BLOCK_ZERO),
3179         .compressed = !!(ret & BDRV_BLOCK_COMPRESSED),
3180         .offset = map,
3181         .has_offset = has_offset,
3182         .depth = depth,
3183         .present = !!(ret & BDRV_BLOCK_ALLOCATED),
3184         .filename = filename,
3185     };
3186 
3187     return 0;
3188 }
3189 
3190 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
3191 {
3192     if (curr->length == 0) {
3193         return false;
3194     }
3195     if (curr->zero != next->zero ||
3196         curr->data != next->data ||
3197         curr->compressed != next->compressed ||
3198         curr->depth != next->depth ||
3199         curr->present != next->present ||
3200         !curr->filename != !next->filename ||
3201         curr->has_offset != next->has_offset) {
3202         return false;
3203     }
3204     if (curr->filename && strcmp(curr->filename, next->filename)) {
3205         return false;
3206     }
3207     if (curr->has_offset && curr->offset + curr->length != next->offset) {
3208         return false;
3209     }
3210     return true;
3211 }
3212 
3213 static int img_map(int argc, char **argv)
3214 {
3215     int c;
3216     OutputFormat output_format = OFORMAT_HUMAN;
3217     BlockBackend *blk;
3218     BlockDriverState *bs;
3219     const char *filename, *fmt, *output;
3220     int64_t length;
3221     MapEntry curr = { .length = 0 }, next;
3222     int ret = 0;
3223     bool image_opts = false;
3224     bool force_share = false;
3225     int64_t start_offset = 0;
3226     int64_t max_length = -1;
3227 
3228     fmt = NULL;
3229     output = NULL;
3230     for (;;) {
3231         int option_index = 0;
3232         static const struct option long_options[] = {
3233             {"help", no_argument, 0, 'h'},
3234             {"format", required_argument, 0, 'f'},
3235             {"output", required_argument, 0, OPTION_OUTPUT},
3236             {"object", required_argument, 0, OPTION_OBJECT},
3237             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3238             {"force-share", no_argument, 0, 'U'},
3239             {"start-offset", required_argument, 0, 's'},
3240             {"max-length", required_argument, 0, 'l'},
3241             {0, 0, 0, 0}
3242         };
3243         c = getopt_long(argc, argv, ":f:s:l:hU",
3244                         long_options, &option_index);
3245         if (c == -1) {
3246             break;
3247         }
3248         switch (c) {
3249         case ':':
3250             missing_argument(argv[optind - 1]);
3251             break;
3252         case '?':
3253             unrecognized_option(argv[optind - 1]);
3254             break;
3255         case 'h':
3256             help();
3257             break;
3258         case 'f':
3259             fmt = optarg;
3260             break;
3261         case 'U':
3262             force_share = true;
3263             break;
3264         case OPTION_OUTPUT:
3265             output = optarg;
3266             break;
3267         case 's':
3268             start_offset = cvtnum("start offset", optarg);
3269             if (start_offset < 0) {
3270                 return 1;
3271             }
3272             break;
3273         case 'l':
3274             max_length = cvtnum("max length", optarg);
3275             if (max_length < 0) {
3276                 return 1;
3277             }
3278             break;
3279         case OPTION_OBJECT:
3280             user_creatable_process_cmdline(optarg);
3281             break;
3282         case OPTION_IMAGE_OPTS:
3283             image_opts = true;
3284             break;
3285         }
3286     }
3287     if (optind != argc - 1) {
3288         error_exit("Expecting one image file name");
3289     }
3290     filename = argv[optind];
3291 
3292     if (output && !strcmp(output, "json")) {
3293         output_format = OFORMAT_JSON;
3294     } else if (output && !strcmp(output, "human")) {
3295         output_format = OFORMAT_HUMAN;
3296     } else if (output) {
3297         error_report("--output must be used with human or json as argument.");
3298         return 1;
3299     }
3300 
3301     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3302     if (!blk) {
3303         return 1;
3304     }
3305     bs = blk_bs(blk);
3306 
3307     if (output_format == OFORMAT_HUMAN) {
3308         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3309     } else if (output_format == OFORMAT_JSON) {
3310         putchar('[');
3311     }
3312 
3313     length = blk_getlength(blk);
3314     if (length < 0) {
3315         error_report("Failed to get size for '%s'", filename);
3316         return 1;
3317     }
3318     if (max_length != -1) {
3319         length = MIN(start_offset + max_length, length);
3320     }
3321 
3322     curr.start = start_offset;
3323     while (curr.start + curr.length < length) {
3324         int64_t offset = curr.start + curr.length;
3325         int64_t n = length - offset;
3326 
3327         ret = get_block_status(bs, offset, n, &next);
3328         if (ret < 0) {
3329             error_report("Could not read file metadata: %s", strerror(-ret));
3330             goto out;
3331         }
3332 
3333         if (entry_mergeable(&curr, &next)) {
3334             curr.length += next.length;
3335             continue;
3336         }
3337 
3338         if (curr.length > 0) {
3339             ret = dump_map_entry(output_format, &curr, &next);
3340             if (ret < 0) {
3341                 goto out;
3342             }
3343         }
3344         curr = next;
3345     }
3346 
3347     ret = dump_map_entry(output_format, &curr, NULL);
3348     if (output_format == OFORMAT_JSON) {
3349         puts("]");
3350     }
3351 
3352 out:
3353     blk_unref(blk);
3354     return ret < 0;
3355 }
3356 
3357 #define SNAPSHOT_LIST   1
3358 #define SNAPSHOT_CREATE 2
3359 #define SNAPSHOT_APPLY  3
3360 #define SNAPSHOT_DELETE 4
3361 
3362 static int img_snapshot(int argc, char **argv)
3363 {
3364     BlockBackend *blk;
3365     BlockDriverState *bs;
3366     QEMUSnapshotInfo sn;
3367     char *filename, *snapshot_name = NULL;
3368     int c, ret = 0, bdrv_oflags;
3369     int action = 0;
3370     bool quiet = false;
3371     Error *err = NULL;
3372     bool image_opts = false;
3373     bool force_share = false;
3374     int64_t rt;
3375 
3376     bdrv_oflags = BDRV_O_RDWR;
3377     /* Parse commandline parameters */
3378     for(;;) {
3379         static const struct option long_options[] = {
3380             {"help", no_argument, 0, 'h'},
3381             {"object", required_argument, 0, OPTION_OBJECT},
3382             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3383             {"force-share", no_argument, 0, 'U'},
3384             {0, 0, 0, 0}
3385         };
3386         c = getopt_long(argc, argv, ":la:c:d:hqU",
3387                         long_options, NULL);
3388         if (c == -1) {
3389             break;
3390         }
3391         switch(c) {
3392         case ':':
3393             missing_argument(argv[optind - 1]);
3394             break;
3395         case '?':
3396             unrecognized_option(argv[optind - 1]);
3397             break;
3398         case 'h':
3399             help();
3400             return 0;
3401         case 'l':
3402             if (action) {
3403                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3404                 return 0;
3405             }
3406             action = SNAPSHOT_LIST;
3407             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3408             break;
3409         case 'a':
3410             if (action) {
3411                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3412                 return 0;
3413             }
3414             action = SNAPSHOT_APPLY;
3415             snapshot_name = optarg;
3416             break;
3417         case 'c':
3418             if (action) {
3419                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3420                 return 0;
3421             }
3422             action = SNAPSHOT_CREATE;
3423             snapshot_name = optarg;
3424             break;
3425         case 'd':
3426             if (action) {
3427                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3428                 return 0;
3429             }
3430             action = SNAPSHOT_DELETE;
3431             snapshot_name = optarg;
3432             break;
3433         case 'q':
3434             quiet = true;
3435             break;
3436         case 'U':
3437             force_share = true;
3438             break;
3439         case OPTION_OBJECT:
3440             user_creatable_process_cmdline(optarg);
3441             break;
3442         case OPTION_IMAGE_OPTS:
3443             image_opts = true;
3444             break;
3445         }
3446     }
3447 
3448     if (optind != argc - 1) {
3449         error_exit("Expecting one image file name");
3450     }
3451     filename = argv[optind++];
3452 
3453     /* Open the image */
3454     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3455                    force_share);
3456     if (!blk) {
3457         return 1;
3458     }
3459     bs = blk_bs(blk);
3460 
3461     /* Perform the requested action */
3462     switch(action) {
3463     case SNAPSHOT_LIST:
3464         dump_snapshots(bs);
3465         break;
3466 
3467     case SNAPSHOT_CREATE:
3468         memset(&sn, 0, sizeof(sn));
3469         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3470 
3471         rt = g_get_real_time();
3472         sn.date_sec = rt / G_USEC_PER_SEC;
3473         sn.date_nsec = (rt % G_USEC_PER_SEC) * 1000;
3474 
3475         bdrv_graph_rdlock_main_loop();
3476         ret = bdrv_snapshot_create(bs, &sn);
3477         bdrv_graph_rdunlock_main_loop();
3478 
3479         if (ret) {
3480             error_report("Could not create snapshot '%s': %s",
3481                 snapshot_name, strerror(-ret));
3482         }
3483         break;
3484 
3485     case SNAPSHOT_APPLY:
3486         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3487         if (ret) {
3488             error_reportf_err(err, "Could not apply snapshot '%s': ",
3489                               snapshot_name);
3490         }
3491         break;
3492 
3493     case SNAPSHOT_DELETE:
3494         bdrv_graph_rdlock_main_loop();
3495         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3496         if (ret < 0) {
3497             error_report("Could not delete snapshot '%s': snapshot not "
3498                          "found", snapshot_name);
3499             ret = 1;
3500         } else {
3501             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3502             if (ret < 0) {
3503                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3504                                   snapshot_name);
3505                 ret = 1;
3506             }
3507         }
3508         bdrv_graph_rdunlock_main_loop();
3509         break;
3510     }
3511 
3512     /* Cleanup */
3513     blk_unref(blk);
3514     if (ret) {
3515         return 1;
3516     }
3517     return 0;
3518 }
3519 
3520 static int img_rebase(int argc, char **argv)
3521 {
3522     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3523     uint8_t *buf_old = NULL;
3524     uint8_t *buf_new = NULL;
3525     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3526     BlockDriverState *unfiltered_bs;
3527     char *filename;
3528     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3529     int c, flags, src_flags, ret;
3530     bool writethrough, src_writethrough;
3531     int unsafe = 0;
3532     bool force_share = false;
3533     int progress = 0;
3534     bool quiet = false;
3535     Error *local_err = NULL;
3536     bool image_opts = false;
3537 
3538     /* Parse commandline parameters */
3539     fmt = NULL;
3540     cache = BDRV_DEFAULT_CACHE;
3541     src_cache = BDRV_DEFAULT_CACHE;
3542     out_baseimg = NULL;
3543     out_basefmt = NULL;
3544     for(;;) {
3545         static const struct option long_options[] = {
3546             {"help", no_argument, 0, 'h'},
3547             {"object", required_argument, 0, OPTION_OBJECT},
3548             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3549             {"force-share", no_argument, 0, 'U'},
3550             {0, 0, 0, 0}
3551         };
3552         c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
3553                         long_options, NULL);
3554         if (c == -1) {
3555             break;
3556         }
3557         switch(c) {
3558         case ':':
3559             missing_argument(argv[optind - 1]);
3560             break;
3561         case '?':
3562             unrecognized_option(argv[optind - 1]);
3563             break;
3564         case 'h':
3565             help();
3566             return 0;
3567         case 'f':
3568             fmt = optarg;
3569             break;
3570         case 'F':
3571             out_basefmt = optarg;
3572             break;
3573         case 'b':
3574             out_baseimg = optarg;
3575             break;
3576         case 'u':
3577             unsafe = 1;
3578             break;
3579         case 'p':
3580             progress = 1;
3581             break;
3582         case 't':
3583             cache = optarg;
3584             break;
3585         case 'T':
3586             src_cache = optarg;
3587             break;
3588         case 'q':
3589             quiet = true;
3590             break;
3591         case OPTION_OBJECT:
3592             user_creatable_process_cmdline(optarg);
3593             break;
3594         case OPTION_IMAGE_OPTS:
3595             image_opts = true;
3596             break;
3597         case 'U':
3598             force_share = true;
3599             break;
3600         }
3601     }
3602 
3603     if (quiet) {
3604         progress = 0;
3605     }
3606 
3607     if (optind != argc - 1) {
3608         error_exit("Expecting one image file name");
3609     }
3610     if (!unsafe && !out_baseimg) {
3611         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3612     }
3613     filename = argv[optind++];
3614 
3615     qemu_progress_init(progress, 2.0);
3616     qemu_progress_print(0, 100);
3617 
3618     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3619     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3620     if (ret < 0) {
3621         error_report("Invalid cache option: %s", cache);
3622         goto out;
3623     }
3624 
3625     src_flags = 0;
3626     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3627     if (ret < 0) {
3628         error_report("Invalid source cache option: %s", src_cache);
3629         goto out;
3630     }
3631 
3632     /* The source files are opened read-only, don't care about WCE */
3633     assert((src_flags & BDRV_O_RDWR) == 0);
3634     (void) src_writethrough;
3635 
3636     /*
3637      * Open the images.
3638      *
3639      * Ignore the old backing file for unsafe rebase in case we want to correct
3640      * the reference to a renamed or moved backing file.
3641      */
3642     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3643                    false);
3644     if (!blk) {
3645         ret = -1;
3646         goto out;
3647     }
3648     bs = blk_bs(blk);
3649 
3650     unfiltered_bs = bdrv_skip_filters(bs);
3651 
3652     if (out_basefmt != NULL) {
3653         if (bdrv_find_format(out_basefmt) == NULL) {
3654             error_report("Invalid format name: '%s'", out_basefmt);
3655             ret = -1;
3656             goto out;
3657         }
3658     }
3659 
3660     /* For safe rebasing we need to compare old and new backing file */
3661     if (!unsafe) {
3662         QDict *options = NULL;
3663         BlockDriverState *base_bs = bdrv_cow_bs(unfiltered_bs);
3664 
3665         if (base_bs) {
3666             blk_old_backing = blk_new(qemu_get_aio_context(),
3667                                       BLK_PERM_CONSISTENT_READ,
3668                                       BLK_PERM_ALL);
3669             ret = blk_insert_bs(blk_old_backing, base_bs,
3670                                 &local_err);
3671             if (ret < 0) {
3672                 error_reportf_err(local_err,
3673                                   "Could not reuse old backing file '%s': ",
3674                                   base_bs->filename);
3675                 goto out;
3676             }
3677         } else {
3678             blk_old_backing = NULL;
3679         }
3680 
3681         if (out_baseimg[0]) {
3682             const char *overlay_filename;
3683             char *out_real_path;
3684 
3685             options = qdict_new();
3686             if (out_basefmt) {
3687                 qdict_put_str(options, "driver", out_basefmt);
3688             }
3689             if (force_share) {
3690                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3691             }
3692 
3693             bdrv_graph_rdlock_main_loop();
3694             bdrv_refresh_filename(bs);
3695             bdrv_graph_rdunlock_main_loop();
3696             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3697                                                      : bs->filename;
3698             out_real_path =
3699                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3700                                                              out_baseimg,
3701                                                              &local_err);
3702             if (local_err) {
3703                 qobject_unref(options);
3704                 error_reportf_err(local_err,
3705                                   "Could not resolve backing filename: ");
3706                 ret = -1;
3707                 goto out;
3708             }
3709 
3710             /*
3711              * Find out whether we rebase an image on top of a previous image
3712              * in its chain.
3713              */
3714             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3715             if (prefix_chain_bs) {
3716                 qobject_unref(options);
3717                 g_free(out_real_path);
3718 
3719                 blk_new_backing = blk_new(qemu_get_aio_context(),
3720                                           BLK_PERM_CONSISTENT_READ,
3721                                           BLK_PERM_ALL);
3722                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3723                                     &local_err);
3724                 if (ret < 0) {
3725                     error_reportf_err(local_err,
3726                                       "Could not reuse backing file '%s': ",
3727                                       out_baseimg);
3728                     goto out;
3729                 }
3730             } else {
3731                 blk_new_backing = blk_new_open(out_real_path, NULL,
3732                                                options, src_flags, &local_err);
3733                 g_free(out_real_path);
3734                 if (!blk_new_backing) {
3735                     error_reportf_err(local_err,
3736                                       "Could not open new backing file '%s': ",
3737                                       out_baseimg);
3738                     ret = -1;
3739                     goto out;
3740                 }
3741             }
3742         }
3743     }
3744 
3745     /*
3746      * Check each unallocated cluster in the COW file. If it is unallocated,
3747      * accesses go to the backing file. We must therefore compare this cluster
3748      * in the old and new backing file, and if they differ we need to copy it
3749      * from the old backing file into the COW file.
3750      *
3751      * If qemu-img crashes during this step, no harm is done. The content of
3752      * the image is the same as the original one at any time.
3753      */
3754     if (!unsafe) {
3755         int64_t size;
3756         int64_t old_backing_size = 0;
3757         int64_t new_backing_size = 0;
3758         uint64_t offset;
3759         int64_t n;
3760         float local_progress = 0;
3761 
3762         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3763         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3764 
3765         size = blk_getlength(blk);
3766         if (size < 0) {
3767             error_report("Could not get size of '%s': %s",
3768                          filename, strerror(-size));
3769             ret = -1;
3770             goto out;
3771         }
3772         if (blk_old_backing) {
3773             old_backing_size = blk_getlength(blk_old_backing);
3774             if (old_backing_size < 0) {
3775                 char backing_name[PATH_MAX];
3776 
3777                 bdrv_get_backing_filename(bs, backing_name,
3778                                           sizeof(backing_name));
3779                 error_report("Could not get size of '%s': %s",
3780                              backing_name, strerror(-old_backing_size));
3781                 ret = -1;
3782                 goto out;
3783             }
3784         }
3785         if (blk_new_backing) {
3786             new_backing_size = blk_getlength(blk_new_backing);
3787             if (new_backing_size < 0) {
3788                 error_report("Could not get size of '%s': %s",
3789                              out_baseimg, strerror(-new_backing_size));
3790                 ret = -1;
3791                 goto out;
3792             }
3793         }
3794 
3795         if (size != 0) {
3796             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3797         }
3798 
3799         for (offset = 0; offset < size; offset += n) {
3800             bool buf_old_is_zero = false;
3801 
3802             /* How many bytes can we handle with the next read? */
3803             n = MIN(IO_BUF_SIZE, size - offset);
3804 
3805             /* If the cluster is allocated, we don't need to take action */
3806             ret = bdrv_is_allocated(unfiltered_bs, offset, n, &n);
3807             if (ret < 0) {
3808                 error_report("error while reading image metadata: %s",
3809                              strerror(-ret));
3810                 goto out;
3811             }
3812             if (ret) {
3813                 continue;
3814             }
3815 
3816             if (prefix_chain_bs) {
3817                 /*
3818                  * If cluster wasn't changed since prefix_chain, we don't need
3819                  * to take action
3820                  */
3821                 ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
3822                                               prefix_chain_bs, false,
3823                                               offset, n, &n);
3824                 if (ret < 0) {
3825                     error_report("error while reading image metadata: %s",
3826                                  strerror(-ret));
3827                     goto out;
3828                 }
3829                 if (!ret) {
3830                     continue;
3831                 }
3832             }
3833 
3834             /*
3835              * Read old and new backing file and take into consideration that
3836              * backing files may be smaller than the COW image.
3837              */
3838             if (offset >= old_backing_size) {
3839                 memset(buf_old, 0, n);
3840                 buf_old_is_zero = true;
3841             } else {
3842                 if (offset + n > old_backing_size) {
3843                     n = old_backing_size - offset;
3844                 }
3845 
3846                 ret = blk_pread(blk_old_backing, offset, n, buf_old, 0);
3847                 if (ret < 0) {
3848                     error_report("error while reading from old backing file");
3849                     goto out;
3850                 }
3851             }
3852 
3853             if (offset >= new_backing_size || !blk_new_backing) {
3854                 memset(buf_new, 0, n);
3855             } else {
3856                 if (offset + n > new_backing_size) {
3857                     n = new_backing_size - offset;
3858                 }
3859 
3860                 ret = blk_pread(blk_new_backing, offset, n, buf_new, 0);
3861                 if (ret < 0) {
3862                     error_report("error while reading from new backing file");
3863                     goto out;
3864                 }
3865             }
3866 
3867             /* If they differ, we need to write to the COW file */
3868             uint64_t written = 0;
3869 
3870             while (written < n) {
3871                 int64_t pnum;
3872 
3873                 if (compare_buffers(buf_old + written, buf_new + written,
3874                                     n - written, &pnum))
3875                 {
3876                     if (buf_old_is_zero) {
3877                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
3878                     } else {
3879                         ret = blk_pwrite(blk, offset + written, pnum,
3880                                          buf_old + written, 0);
3881                     }
3882                     if (ret < 0) {
3883                         error_report("Error while writing to COW image: %s",
3884                             strerror(-ret));
3885                         goto out;
3886                     }
3887                 }
3888 
3889                 written += pnum;
3890             }
3891             qemu_progress_print(local_progress, 100);
3892         }
3893     }
3894 
3895     /*
3896      * Change the backing file. All clusters that are different from the old
3897      * backing file are overwritten in the COW file now, so the visible content
3898      * doesn't change when we switch the backing file.
3899      */
3900     if (out_baseimg && *out_baseimg) {
3901         ret = bdrv_change_backing_file(unfiltered_bs, out_baseimg, out_basefmt,
3902                                        true);
3903     } else {
3904         ret = bdrv_change_backing_file(unfiltered_bs, NULL, NULL, false);
3905     }
3906 
3907     if (ret == -ENOSPC) {
3908         error_report("Could not change the backing file to '%s': No "
3909                      "space left in the file header", out_baseimg);
3910     } else if (ret == -EINVAL && out_baseimg && !out_basefmt) {
3911         error_report("Could not change the backing file to '%s': backing "
3912                      "format must be specified", out_baseimg);
3913     } else if (ret < 0) {
3914         error_report("Could not change the backing file to '%s': %s",
3915             out_baseimg, strerror(-ret));
3916     }
3917 
3918     qemu_progress_print(100, 0);
3919     /*
3920      * TODO At this point it is possible to check if any clusters that are
3921      * allocated in the COW file are the same in the backing file. If so, they
3922      * could be dropped from the COW file. Don't do this before switching the
3923      * backing file, in case of a crash this would lead to corruption.
3924      */
3925 out:
3926     qemu_progress_end();
3927     /* Cleanup */
3928     if (!unsafe) {
3929         blk_unref(blk_old_backing);
3930         blk_unref(blk_new_backing);
3931     }
3932     qemu_vfree(buf_old);
3933     qemu_vfree(buf_new);
3934 
3935     blk_unref(blk);
3936     if (ret) {
3937         return 1;
3938     }
3939     return 0;
3940 }
3941 
3942 static int img_resize(int argc, char **argv)
3943 {
3944     Error *err = NULL;
3945     int c, ret, relative;
3946     const char *filename, *fmt, *size;
3947     int64_t n, total_size, current_size;
3948     bool quiet = false;
3949     BlockBackend *blk = NULL;
3950     PreallocMode prealloc = PREALLOC_MODE_OFF;
3951     QemuOpts *param;
3952 
3953     static QemuOptsList resize_options = {
3954         .name = "resize_options",
3955         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3956         .desc = {
3957             {
3958                 .name = BLOCK_OPT_SIZE,
3959                 .type = QEMU_OPT_SIZE,
3960                 .help = "Virtual disk size"
3961             }, {
3962                 /* end of list */
3963             }
3964         },
3965     };
3966     bool image_opts = false;
3967     bool shrink = false;
3968 
3969     /* Remove size from argv manually so that negative numbers are not treated
3970      * as options by getopt. */
3971     if (argc < 3) {
3972         error_exit("Not enough arguments");
3973         return 1;
3974     }
3975 
3976     size = argv[--argc];
3977 
3978     /* Parse getopt arguments */
3979     fmt = NULL;
3980     for(;;) {
3981         static const struct option long_options[] = {
3982             {"help", no_argument, 0, 'h'},
3983             {"object", required_argument, 0, OPTION_OBJECT},
3984             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3985             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
3986             {"shrink", no_argument, 0, OPTION_SHRINK},
3987             {0, 0, 0, 0}
3988         };
3989         c = getopt_long(argc, argv, ":f:hq",
3990                         long_options, NULL);
3991         if (c == -1) {
3992             break;
3993         }
3994         switch(c) {
3995         case ':':
3996             missing_argument(argv[optind - 1]);
3997             break;
3998         case '?':
3999             unrecognized_option(argv[optind - 1]);
4000             break;
4001         case 'h':
4002             help();
4003             break;
4004         case 'f':
4005             fmt = optarg;
4006             break;
4007         case 'q':
4008             quiet = true;
4009             break;
4010         case OPTION_OBJECT:
4011             user_creatable_process_cmdline(optarg);
4012             break;
4013         case OPTION_IMAGE_OPTS:
4014             image_opts = true;
4015             break;
4016         case OPTION_PREALLOCATION:
4017             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
4018                                        PREALLOC_MODE__MAX, NULL);
4019             if (prealloc == PREALLOC_MODE__MAX) {
4020                 error_report("Invalid preallocation mode '%s'", optarg);
4021                 return 1;
4022             }
4023             break;
4024         case OPTION_SHRINK:
4025             shrink = true;
4026             break;
4027         }
4028     }
4029     if (optind != argc - 1) {
4030         error_exit("Expecting image file name and size");
4031     }
4032     filename = argv[optind++];
4033 
4034     /* Choose grow, shrink, or absolute resize mode */
4035     switch (size[0]) {
4036     case '+':
4037         relative = 1;
4038         size++;
4039         break;
4040     case '-':
4041         relative = -1;
4042         size++;
4043         break;
4044     default:
4045         relative = 0;
4046         break;
4047     }
4048 
4049     /* Parse size */
4050     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
4051     if (!qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err)) {
4052         error_report_err(err);
4053         ret = -1;
4054         qemu_opts_del(param);
4055         goto out;
4056     }
4057     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
4058     qemu_opts_del(param);
4059 
4060     blk = img_open(image_opts, filename, fmt,
4061                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
4062                    false);
4063     if (!blk) {
4064         ret = -1;
4065         goto out;
4066     }
4067 
4068     current_size = blk_getlength(blk);
4069     if (current_size < 0) {
4070         error_report("Failed to inquire current image length: %s",
4071                      strerror(-current_size));
4072         ret = -1;
4073         goto out;
4074     }
4075 
4076     if (relative) {
4077         total_size = current_size + n * relative;
4078     } else {
4079         total_size = n;
4080     }
4081     if (total_size <= 0) {
4082         error_report("New image size must be positive");
4083         ret = -1;
4084         goto out;
4085     }
4086 
4087     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
4088         error_report("Preallocation can only be used for growing images");
4089         ret = -1;
4090         goto out;
4091     }
4092 
4093     if (total_size < current_size && !shrink) {
4094         error_report("Use the --shrink option to perform a shrink operation.");
4095         warn_report("Shrinking an image will delete all data beyond the "
4096                     "shrunken image's end. Before performing such an "
4097                     "operation, make sure there is no important data there.");
4098         ret = -1;
4099         goto out;
4100     }
4101 
4102     /*
4103      * The user expects the image to have the desired size after
4104      * resizing, so pass @exact=true.  It is of no use to report
4105      * success when the image has not actually been resized.
4106      */
4107     ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
4108     if (!ret) {
4109         qprintf(quiet, "Image resized.\n");
4110     } else {
4111         error_report_err(err);
4112     }
4113 out:
4114     blk_unref(blk);
4115     if (ret) {
4116         return 1;
4117     }
4118     return 0;
4119 }
4120 
4121 static void amend_status_cb(BlockDriverState *bs,
4122                             int64_t offset, int64_t total_work_size,
4123                             void *opaque)
4124 {
4125     qemu_progress_print(100.f * offset / total_work_size, 0);
4126 }
4127 
4128 static int print_amend_option_help(const char *format)
4129 {
4130     BlockDriver *drv;
4131 
4132     GRAPH_RDLOCK_GUARD_MAINLOOP();
4133 
4134     /* Find driver and parse its options */
4135     drv = bdrv_find_format(format);
4136     if (!drv) {
4137         error_report("Unknown file format '%s'", format);
4138         return 1;
4139     }
4140 
4141     if (!drv->bdrv_amend_options) {
4142         error_report("Format driver '%s' does not support option amendment",
4143                      format);
4144         return 1;
4145     }
4146 
4147     /* Every driver supporting amendment must have amend_opts */
4148     assert(drv->amend_opts);
4149 
4150     printf("Amend options for '%s':\n", format);
4151     qemu_opts_print_help(drv->amend_opts, false);
4152     return 0;
4153 }
4154 
4155 static int img_amend(int argc, char **argv)
4156 {
4157     Error *err = NULL;
4158     int c, ret = 0;
4159     char *options = NULL;
4160     QemuOptsList *amend_opts = NULL;
4161     QemuOpts *opts = NULL;
4162     const char *fmt = NULL, *filename, *cache;
4163     int flags;
4164     bool writethrough;
4165     bool quiet = false, progress = false;
4166     BlockBackend *blk = NULL;
4167     BlockDriverState *bs = NULL;
4168     bool image_opts = false;
4169     bool force = false;
4170 
4171     cache = BDRV_DEFAULT_CACHE;
4172     for (;;) {
4173         static const struct option long_options[] = {
4174             {"help", no_argument, 0, 'h'},
4175             {"object", required_argument, 0, OPTION_OBJECT},
4176             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4177             {"force", no_argument, 0, OPTION_FORCE},
4178             {0, 0, 0, 0}
4179         };
4180         c = getopt_long(argc, argv, ":ho:f:t:pq",
4181                         long_options, NULL);
4182         if (c == -1) {
4183             break;
4184         }
4185 
4186         switch (c) {
4187         case ':':
4188             missing_argument(argv[optind - 1]);
4189             break;
4190         case '?':
4191             unrecognized_option(argv[optind - 1]);
4192             break;
4193         case 'h':
4194             help();
4195             break;
4196         case 'o':
4197             if (accumulate_options(&options, optarg) < 0) {
4198                 ret = -1;
4199                 goto out_no_progress;
4200             }
4201             break;
4202         case 'f':
4203             fmt = optarg;
4204             break;
4205         case 't':
4206             cache = optarg;
4207             break;
4208         case 'p':
4209             progress = true;
4210             break;
4211         case 'q':
4212             quiet = true;
4213             break;
4214         case OPTION_OBJECT:
4215             user_creatable_process_cmdline(optarg);
4216             break;
4217         case OPTION_IMAGE_OPTS:
4218             image_opts = true;
4219             break;
4220         case OPTION_FORCE:
4221             force = true;
4222             break;
4223         }
4224     }
4225 
4226     if (!options) {
4227         error_exit("Must specify options (-o)");
4228     }
4229 
4230     if (quiet) {
4231         progress = false;
4232     }
4233     qemu_progress_init(progress, 1.0);
4234 
4235     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4236     if (fmt && has_help_option(options)) {
4237         /* If a format is explicitly specified (and possibly no filename is
4238          * given), print option help here */
4239         ret = print_amend_option_help(fmt);
4240         goto out;
4241     }
4242 
4243     if (optind != argc - 1) {
4244         error_report("Expecting one image file name");
4245         ret = -1;
4246         goto out;
4247     }
4248 
4249     flags = BDRV_O_RDWR;
4250     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4251     if (ret < 0) {
4252         error_report("Invalid cache option: %s", cache);
4253         goto out;
4254     }
4255 
4256     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4257                    false);
4258     if (!blk) {
4259         ret = -1;
4260         goto out;
4261     }
4262     bs = blk_bs(blk);
4263 
4264     fmt = bs->drv->format_name;
4265 
4266     if (has_help_option(options)) {
4267         /* If the format was auto-detected, print option help here */
4268         ret = print_amend_option_help(fmt);
4269         goto out;
4270     }
4271 
4272     bdrv_graph_rdlock_main_loop();
4273     if (!bs->drv->bdrv_amend_options) {
4274         error_report("Format driver '%s' does not support option amendment",
4275                      fmt);
4276         bdrv_graph_rdunlock_main_loop();
4277         ret = -1;
4278         goto out;
4279     }
4280 
4281     /* Every driver supporting amendment must have amend_opts */
4282     assert(bs->drv->amend_opts);
4283 
4284     amend_opts = qemu_opts_append(amend_opts, bs->drv->amend_opts);
4285     opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4286     if (!qemu_opts_do_parse(opts, options, NULL, &err)) {
4287         /* Try to parse options using the create options */
4288         amend_opts = qemu_opts_append(amend_opts, bs->drv->create_opts);
4289         qemu_opts_del(opts);
4290         opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4291         if (qemu_opts_do_parse(opts, options, NULL, NULL)) {
4292             error_append_hint(&err,
4293                               "This option is only supported for image creation\n");
4294         }
4295 
4296         bdrv_graph_rdunlock_main_loop();
4297         error_report_err(err);
4298         ret = -1;
4299         goto out;
4300     }
4301 
4302     /* In case the driver does not call amend_status_cb() */
4303     qemu_progress_print(0.f, 0);
4304     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err);
4305     qemu_progress_print(100.f, 0);
4306     bdrv_graph_rdunlock_main_loop();
4307 
4308     if (ret < 0) {
4309         error_report_err(err);
4310         goto out;
4311     }
4312 
4313 out:
4314     qemu_progress_end();
4315 
4316 out_no_progress:
4317     blk_unref(blk);
4318     qemu_opts_del(opts);
4319     qemu_opts_free(amend_opts);
4320     g_free(options);
4321 
4322     if (ret) {
4323         return 1;
4324     }
4325     return 0;
4326 }
4327 
4328 typedef struct BenchData {
4329     BlockBackend *blk;
4330     uint64_t image_size;
4331     bool write;
4332     int bufsize;
4333     int step;
4334     int nrreq;
4335     int n;
4336     int flush_interval;
4337     bool drain_on_flush;
4338     uint8_t *buf;
4339     QEMUIOVector *qiov;
4340 
4341     int in_flight;
4342     bool in_flush;
4343     uint64_t offset;
4344 } BenchData;
4345 
4346 static void bench_undrained_flush_cb(void *opaque, int ret)
4347 {
4348     if (ret < 0) {
4349         error_report("Failed flush request: %s", strerror(-ret));
4350         exit(EXIT_FAILURE);
4351     }
4352 }
4353 
4354 static void bench_cb(void *opaque, int ret)
4355 {
4356     BenchData *b = opaque;
4357     BlockAIOCB *acb;
4358 
4359     if (ret < 0) {
4360         error_report("Failed request: %s", strerror(-ret));
4361         exit(EXIT_FAILURE);
4362     }
4363 
4364     if (b->in_flush) {
4365         /* Just finished a flush with drained queue: Start next requests */
4366         assert(b->in_flight == 0);
4367         b->in_flush = false;
4368     } else if (b->in_flight > 0) {
4369         int remaining = b->n - b->in_flight;
4370 
4371         b->n--;
4372         b->in_flight--;
4373 
4374         /* Time for flush? Drain queue if requested, then flush */
4375         if (b->flush_interval && remaining % b->flush_interval == 0) {
4376             if (!b->in_flight || !b->drain_on_flush) {
4377                 BlockCompletionFunc *cb;
4378 
4379                 if (b->drain_on_flush) {
4380                     b->in_flush = true;
4381                     cb = bench_cb;
4382                 } else {
4383                     cb = bench_undrained_flush_cb;
4384                 }
4385 
4386                 acb = blk_aio_flush(b->blk, cb, b);
4387                 if (!acb) {
4388                     error_report("Failed to issue flush request");
4389                     exit(EXIT_FAILURE);
4390                 }
4391             }
4392             if (b->drain_on_flush) {
4393                 return;
4394             }
4395         }
4396     }
4397 
4398     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4399         int64_t offset = b->offset;
4400         /* blk_aio_* might look for completed I/Os and kick bench_cb
4401          * again, so make sure this operation is counted by in_flight
4402          * and b->offset is ready for the next submission.
4403          */
4404         b->in_flight++;
4405         b->offset += b->step;
4406         b->offset %= b->image_size;
4407         if (b->write) {
4408             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4409         } else {
4410             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4411         }
4412         if (!acb) {
4413             error_report("Failed to issue request");
4414             exit(EXIT_FAILURE);
4415         }
4416     }
4417 }
4418 
4419 static int img_bench(int argc, char **argv)
4420 {
4421     int c, ret = 0;
4422     const char *fmt = NULL, *filename;
4423     bool quiet = false;
4424     bool image_opts = false;
4425     bool is_write = false;
4426     int count = 75000;
4427     int depth = 64;
4428     int64_t offset = 0;
4429     size_t bufsize = 4096;
4430     int pattern = 0;
4431     size_t step = 0;
4432     int flush_interval = 0;
4433     bool drain_on_flush = true;
4434     int64_t image_size;
4435     BlockBackend *blk = NULL;
4436     BenchData data = {};
4437     int flags = 0;
4438     bool writethrough = false;
4439     struct timeval t1, t2;
4440     int i;
4441     bool force_share = false;
4442     size_t buf_size = 0;
4443 
4444     for (;;) {
4445         static const struct option long_options[] = {
4446             {"help", no_argument, 0, 'h'},
4447             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4448             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4449             {"pattern", required_argument, 0, OPTION_PATTERN},
4450             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4451             {"force-share", no_argument, 0, 'U'},
4452             {0, 0, 0, 0}
4453         };
4454         c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
4455                         NULL);
4456         if (c == -1) {
4457             break;
4458         }
4459 
4460         switch (c) {
4461         case ':':
4462             missing_argument(argv[optind - 1]);
4463             break;
4464         case '?':
4465             unrecognized_option(argv[optind - 1]);
4466             break;
4467         case 'h':
4468             help();
4469             break;
4470         case 'c':
4471         {
4472             unsigned long res;
4473 
4474             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4475                 error_report("Invalid request count specified");
4476                 return 1;
4477             }
4478             count = res;
4479             break;
4480         }
4481         case 'd':
4482         {
4483             unsigned long res;
4484 
4485             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4486                 error_report("Invalid queue depth specified");
4487                 return 1;
4488             }
4489             depth = res;
4490             break;
4491         }
4492         case 'f':
4493             fmt = optarg;
4494             break;
4495         case 'n':
4496             flags |= BDRV_O_NATIVE_AIO;
4497             break;
4498         case 'i':
4499             ret = bdrv_parse_aio(optarg, &flags);
4500             if (ret < 0) {
4501                 error_report("Invalid aio option: %s", optarg);
4502                 ret = -1;
4503                 goto out;
4504             }
4505             break;
4506         case 'o':
4507         {
4508             offset = cvtnum("offset", optarg);
4509             if (offset < 0) {
4510                 return 1;
4511             }
4512             break;
4513         }
4514             break;
4515         case 'q':
4516             quiet = true;
4517             break;
4518         case 's':
4519         {
4520             int64_t sval;
4521 
4522             sval = cvtnum_full("buffer size", optarg, 0, INT_MAX);
4523             if (sval < 0) {
4524                 return 1;
4525             }
4526 
4527             bufsize = sval;
4528             break;
4529         }
4530         case 'S':
4531         {
4532             int64_t sval;
4533 
4534             sval = cvtnum_full("step_size", optarg, 0, INT_MAX);
4535             if (sval < 0) {
4536                 return 1;
4537             }
4538 
4539             step = sval;
4540             break;
4541         }
4542         case 't':
4543             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4544             if (ret < 0) {
4545                 error_report("Invalid cache mode");
4546                 ret = -1;
4547                 goto out;
4548             }
4549             break;
4550         case 'w':
4551             flags |= BDRV_O_RDWR;
4552             is_write = true;
4553             break;
4554         case 'U':
4555             force_share = true;
4556             break;
4557         case OPTION_PATTERN:
4558         {
4559             unsigned long res;
4560 
4561             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4562                 error_report("Invalid pattern byte specified");
4563                 return 1;
4564             }
4565             pattern = res;
4566             break;
4567         }
4568         case OPTION_FLUSH_INTERVAL:
4569         {
4570             unsigned long res;
4571 
4572             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4573                 error_report("Invalid flush interval specified");
4574                 return 1;
4575             }
4576             flush_interval = res;
4577             break;
4578         }
4579         case OPTION_NO_DRAIN:
4580             drain_on_flush = false;
4581             break;
4582         case OPTION_IMAGE_OPTS:
4583             image_opts = true;
4584             break;
4585         }
4586     }
4587 
4588     if (optind != argc - 1) {
4589         error_exit("Expecting one image file name");
4590     }
4591     filename = argv[argc - 1];
4592 
4593     if (!is_write && flush_interval) {
4594         error_report("--flush-interval is only available in write tests");
4595         ret = -1;
4596         goto out;
4597     }
4598     if (flush_interval && flush_interval < depth) {
4599         error_report("Flush interval can't be smaller than depth");
4600         ret = -1;
4601         goto out;
4602     }
4603 
4604     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4605                    force_share);
4606     if (!blk) {
4607         ret = -1;
4608         goto out;
4609     }
4610 
4611     image_size = blk_getlength(blk);
4612     if (image_size < 0) {
4613         ret = image_size;
4614         goto out;
4615     }
4616 
4617     data = (BenchData) {
4618         .blk            = blk,
4619         .image_size     = image_size,
4620         .bufsize        = bufsize,
4621         .step           = step ?: bufsize,
4622         .nrreq          = depth,
4623         .n              = count,
4624         .offset         = offset,
4625         .write          = is_write,
4626         .flush_interval = flush_interval,
4627         .drain_on_flush = drain_on_flush,
4628     };
4629     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4630            "(starting at offset %" PRId64 ", step size %d)\n",
4631            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4632            data.offset, data.step);
4633     if (flush_interval) {
4634         printf("Sending flush every %d requests\n", flush_interval);
4635     }
4636 
4637     buf_size = data.nrreq * data.bufsize;
4638     data.buf = blk_blockalign(blk, buf_size);
4639     memset(data.buf, pattern, data.nrreq * data.bufsize);
4640 
4641     blk_register_buf(blk, data.buf, buf_size, &error_fatal);
4642 
4643     data.qiov = g_new(QEMUIOVector, data.nrreq);
4644     for (i = 0; i < data.nrreq; i++) {
4645         qemu_iovec_init(&data.qiov[i], 1);
4646         qemu_iovec_add(&data.qiov[i],
4647                        data.buf + i * data.bufsize, data.bufsize);
4648     }
4649 
4650     gettimeofday(&t1, NULL);
4651     bench_cb(&data, 0);
4652 
4653     while (data.n > 0) {
4654         main_loop_wait(false);
4655     }
4656     gettimeofday(&t2, NULL);
4657 
4658     printf("Run completed in %3.3f seconds.\n",
4659            (t2.tv_sec - t1.tv_sec)
4660            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4661 
4662 out:
4663     if (data.buf) {
4664         blk_unregister_buf(blk, data.buf, buf_size);
4665     }
4666     qemu_vfree(data.buf);
4667     blk_unref(blk);
4668 
4669     if (ret) {
4670         return 1;
4671     }
4672     return 0;
4673 }
4674 
4675 enum ImgBitmapAct {
4676     BITMAP_ADD,
4677     BITMAP_REMOVE,
4678     BITMAP_CLEAR,
4679     BITMAP_ENABLE,
4680     BITMAP_DISABLE,
4681     BITMAP_MERGE,
4682 };
4683 typedef struct ImgBitmapAction {
4684     enum ImgBitmapAct act;
4685     const char *src; /* only used for merge */
4686     QSIMPLEQ_ENTRY(ImgBitmapAction) next;
4687 } ImgBitmapAction;
4688 
4689 static int img_bitmap(int argc, char **argv)
4690 {
4691     Error *err = NULL;
4692     int c, ret = 1;
4693     QemuOpts *opts = NULL;
4694     const char *fmt = NULL, *src_fmt = NULL, *src_filename = NULL;
4695     const char *filename, *bitmap;
4696     BlockBackend *blk = NULL, *src = NULL;
4697     BlockDriverState *bs = NULL, *src_bs = NULL;
4698     bool image_opts = false;
4699     int64_t granularity = 0;
4700     bool add = false, merge = false;
4701     QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
4702     ImgBitmapAction *act, *act_next;
4703     const char *op;
4704     int inactivate_ret;
4705 
4706     QSIMPLEQ_INIT(&actions);
4707 
4708     for (;;) {
4709         static const struct option long_options[] = {
4710             {"help", no_argument, 0, 'h'},
4711             {"object", required_argument, 0, OPTION_OBJECT},
4712             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4713             {"add", no_argument, 0, OPTION_ADD},
4714             {"remove", no_argument, 0, OPTION_REMOVE},
4715             {"clear", no_argument, 0, OPTION_CLEAR},
4716             {"enable", no_argument, 0, OPTION_ENABLE},
4717             {"disable", no_argument, 0, OPTION_DISABLE},
4718             {"merge", required_argument, 0, OPTION_MERGE},
4719             {"granularity", required_argument, 0, 'g'},
4720             {"source-file", required_argument, 0, 'b'},
4721             {"source-format", required_argument, 0, 'F'},
4722             {0, 0, 0, 0}
4723         };
4724         c = getopt_long(argc, argv, ":b:f:F:g:h", long_options, NULL);
4725         if (c == -1) {
4726             break;
4727         }
4728 
4729         switch (c) {
4730         case ':':
4731             missing_argument(argv[optind - 1]);
4732             break;
4733         case '?':
4734             unrecognized_option(argv[optind - 1]);
4735             break;
4736         case 'h':
4737             help();
4738             break;
4739         case 'b':
4740             src_filename = optarg;
4741             break;
4742         case 'f':
4743             fmt = optarg;
4744             break;
4745         case 'F':
4746             src_fmt = optarg;
4747             break;
4748         case 'g':
4749             granularity = cvtnum("granularity", optarg);
4750             if (granularity < 0) {
4751                 return 1;
4752             }
4753             break;
4754         case OPTION_ADD:
4755             act = g_new0(ImgBitmapAction, 1);
4756             act->act = BITMAP_ADD;
4757             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4758             add = true;
4759             break;
4760         case OPTION_REMOVE:
4761             act = g_new0(ImgBitmapAction, 1);
4762             act->act = BITMAP_REMOVE;
4763             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4764             break;
4765         case OPTION_CLEAR:
4766             act = g_new0(ImgBitmapAction, 1);
4767             act->act = BITMAP_CLEAR;
4768             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4769             break;
4770         case OPTION_ENABLE:
4771             act = g_new0(ImgBitmapAction, 1);
4772             act->act = BITMAP_ENABLE;
4773             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4774             break;
4775         case OPTION_DISABLE:
4776             act = g_new0(ImgBitmapAction, 1);
4777             act->act = BITMAP_DISABLE;
4778             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4779             break;
4780         case OPTION_MERGE:
4781             act = g_new0(ImgBitmapAction, 1);
4782             act->act = BITMAP_MERGE;
4783             act->src = optarg;
4784             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4785             merge = true;
4786             break;
4787         case OPTION_OBJECT:
4788             user_creatable_process_cmdline(optarg);
4789             break;
4790         case OPTION_IMAGE_OPTS:
4791             image_opts = true;
4792             break;
4793         }
4794     }
4795 
4796     if (QSIMPLEQ_EMPTY(&actions)) {
4797         error_report("Need at least one of --add, --remove, --clear, "
4798                      "--enable, --disable, or --merge");
4799         goto out;
4800     }
4801 
4802     if (granularity && !add) {
4803         error_report("granularity only supported with --add");
4804         goto out;
4805     }
4806     if (src_fmt && !src_filename) {
4807         error_report("-F only supported with -b");
4808         goto out;
4809     }
4810     if (src_filename && !merge) {
4811         error_report("Merge bitmap source file only supported with "
4812                      "--merge");
4813         goto out;
4814     }
4815 
4816     if (optind != argc - 2) {
4817         error_report("Expecting filename and bitmap name");
4818         goto out;
4819     }
4820 
4821     filename = argv[optind];
4822     bitmap = argv[optind + 1];
4823 
4824     /*
4825      * No need to open backing chains; we will be manipulating bitmaps
4826      * directly in this image without reference to image contents.
4827      */
4828     blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_NO_BACKING,
4829                    false, false, false);
4830     if (!blk) {
4831         goto out;
4832     }
4833     bs = blk_bs(blk);
4834     if (src_filename) {
4835         src = img_open(false, src_filename, src_fmt, BDRV_O_NO_BACKING,
4836                        false, false, false);
4837         if (!src) {
4838             goto out;
4839         }
4840         src_bs = blk_bs(src);
4841     } else {
4842         src_bs = bs;
4843     }
4844 
4845     QSIMPLEQ_FOREACH_SAFE(act, &actions, next, act_next) {
4846         switch (act->act) {
4847         case BITMAP_ADD:
4848             qmp_block_dirty_bitmap_add(bs->node_name, bitmap,
4849                                        !!granularity, granularity, true, true,
4850                                        false, false, &err);
4851             op = "add";
4852             break;
4853         case BITMAP_REMOVE:
4854             qmp_block_dirty_bitmap_remove(bs->node_name, bitmap, &err);
4855             op = "remove";
4856             break;
4857         case BITMAP_CLEAR:
4858             qmp_block_dirty_bitmap_clear(bs->node_name, bitmap, &err);
4859             op = "clear";
4860             break;
4861         case BITMAP_ENABLE:
4862             qmp_block_dirty_bitmap_enable(bs->node_name, bitmap, &err);
4863             op = "enable";
4864             break;
4865         case BITMAP_DISABLE:
4866             qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err);
4867             op = "disable";
4868             break;
4869         case BITMAP_MERGE:
4870             do_dirty_bitmap_merge(bs->node_name, bitmap, src_bs->node_name,
4871                                   act->src, &err);
4872             op = "merge";
4873             break;
4874         default:
4875             g_assert_not_reached();
4876         }
4877 
4878         if (err) {
4879             error_reportf_err(err, "Operation %s on bitmap %s failed: ",
4880                               op, bitmap);
4881             goto out;
4882         }
4883         g_free(act);
4884     }
4885 
4886     ret = 0;
4887 
4888  out:
4889     /*
4890      * Manually inactivate the images first because this way we can know whether
4891      * an error occurred. blk_unref() doesn't tell us about failures.
4892      */
4893     inactivate_ret = bdrv_inactivate_all();
4894     if (inactivate_ret < 0) {
4895         error_report("Error while closing the image: %s", strerror(-inactivate_ret));
4896         ret = 1;
4897     }
4898 
4899     blk_unref(src);
4900     blk_unref(blk);
4901     qemu_opts_del(opts);
4902     return ret;
4903 }
4904 
4905 #define C_BS      01
4906 #define C_COUNT   02
4907 #define C_IF      04
4908 #define C_OF      010
4909 #define C_SKIP    020
4910 
4911 struct DdInfo {
4912     unsigned int flags;
4913     int64_t count;
4914 };
4915 
4916 struct DdIo {
4917     int bsz;    /* Block size */
4918     char *filename;
4919     uint8_t *buf;
4920     int64_t offset;
4921 };
4922 
4923 struct DdOpts {
4924     const char *name;
4925     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
4926     unsigned int flag;
4927 };
4928 
4929 static int img_dd_bs(const char *arg,
4930                      struct DdIo *in, struct DdIo *out,
4931                      struct DdInfo *dd)
4932 {
4933     int64_t res;
4934 
4935     res = cvtnum_full("bs", arg, 1, INT_MAX);
4936 
4937     if (res < 0) {
4938         return 1;
4939     }
4940     in->bsz = out->bsz = res;
4941 
4942     return 0;
4943 }
4944 
4945 static int img_dd_count(const char *arg,
4946                         struct DdIo *in, struct DdIo *out,
4947                         struct DdInfo *dd)
4948 {
4949     dd->count = cvtnum("count", arg);
4950 
4951     if (dd->count < 0) {
4952         return 1;
4953     }
4954 
4955     return 0;
4956 }
4957 
4958 static int img_dd_if(const char *arg,
4959                      struct DdIo *in, struct DdIo *out,
4960                      struct DdInfo *dd)
4961 {
4962     in->filename = g_strdup(arg);
4963 
4964     return 0;
4965 }
4966 
4967 static int img_dd_of(const char *arg,
4968                      struct DdIo *in, struct DdIo *out,
4969                      struct DdInfo *dd)
4970 {
4971     out->filename = g_strdup(arg);
4972 
4973     return 0;
4974 }
4975 
4976 static int img_dd_skip(const char *arg,
4977                        struct DdIo *in, struct DdIo *out,
4978                        struct DdInfo *dd)
4979 {
4980     in->offset = cvtnum("skip", arg);
4981 
4982     if (in->offset < 0) {
4983         return 1;
4984     }
4985 
4986     return 0;
4987 }
4988 
4989 static int img_dd(int argc, char **argv)
4990 {
4991     int ret = 0;
4992     char *arg = NULL;
4993     char *tmp;
4994     BlockDriver *drv = NULL, *proto_drv = NULL;
4995     BlockBackend *blk1 = NULL, *blk2 = NULL;
4996     QemuOpts *opts = NULL;
4997     QemuOptsList *create_opts = NULL;
4998     Error *local_err = NULL;
4999     bool image_opts = false;
5000     int c, i;
5001     const char *out_fmt = "raw";
5002     const char *fmt = NULL;
5003     int64_t size = 0;
5004     int64_t out_pos, in_pos;
5005     bool force_share = false;
5006     struct DdInfo dd = {
5007         .flags = 0,
5008         .count = 0,
5009     };
5010     struct DdIo in = {
5011         .bsz = 512, /* Block size is by default 512 bytes */
5012         .filename = NULL,
5013         .buf = NULL,
5014         .offset = 0
5015     };
5016     struct DdIo out = {
5017         .bsz = 512,
5018         .filename = NULL,
5019         .buf = NULL,
5020         .offset = 0
5021     };
5022 
5023     const struct DdOpts options[] = {
5024         { "bs", img_dd_bs, C_BS },
5025         { "count", img_dd_count, C_COUNT },
5026         { "if", img_dd_if, C_IF },
5027         { "of", img_dd_of, C_OF },
5028         { "skip", img_dd_skip, C_SKIP },
5029         { NULL, NULL, 0 }
5030     };
5031     const struct option long_options[] = {
5032         { "help", no_argument, 0, 'h'},
5033         { "object", required_argument, 0, OPTION_OBJECT},
5034         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5035         { "force-share", no_argument, 0, 'U'},
5036         { 0, 0, 0, 0 }
5037     };
5038 
5039     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
5040         if (c == EOF) {
5041             break;
5042         }
5043         switch (c) {
5044         case 'O':
5045             out_fmt = optarg;
5046             break;
5047         case 'f':
5048             fmt = optarg;
5049             break;
5050         case ':':
5051             missing_argument(argv[optind - 1]);
5052             break;
5053         case '?':
5054             unrecognized_option(argv[optind - 1]);
5055             break;
5056         case 'h':
5057             help();
5058             break;
5059         case 'U':
5060             force_share = true;
5061             break;
5062         case OPTION_OBJECT:
5063             user_creatable_process_cmdline(optarg);
5064             break;
5065         case OPTION_IMAGE_OPTS:
5066             image_opts = true;
5067             break;
5068         }
5069     }
5070 
5071     for (i = optind; i < argc; i++) {
5072         int j;
5073         arg = g_strdup(argv[i]);
5074 
5075         tmp = strchr(arg, '=');
5076         if (tmp == NULL) {
5077             error_report("unrecognized operand %s", arg);
5078             ret = -1;
5079             goto out;
5080         }
5081 
5082         *tmp++ = '\0';
5083 
5084         for (j = 0; options[j].name != NULL; j++) {
5085             if (!strcmp(arg, options[j].name)) {
5086                 break;
5087             }
5088         }
5089         if (options[j].name == NULL) {
5090             error_report("unrecognized operand %s", arg);
5091             ret = -1;
5092             goto out;
5093         }
5094 
5095         if (options[j].f(tmp, &in, &out, &dd) != 0) {
5096             ret = -1;
5097             goto out;
5098         }
5099         dd.flags |= options[j].flag;
5100         g_free(arg);
5101         arg = NULL;
5102     }
5103 
5104     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
5105         error_report("Must specify both input and output files");
5106         ret = -1;
5107         goto out;
5108     }
5109 
5110     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
5111                     force_share);
5112 
5113     if (!blk1) {
5114         ret = -1;
5115         goto out;
5116     }
5117 
5118     drv = bdrv_find_format(out_fmt);
5119     if (!drv) {
5120         error_report("Unknown file format");
5121         ret = -1;
5122         goto out;
5123     }
5124     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
5125 
5126     if (!proto_drv) {
5127         error_report_err(local_err);
5128         ret = -1;
5129         goto out;
5130     }
5131     if (!drv->create_opts) {
5132         error_report("Format driver '%s' does not support image creation",
5133                      drv->format_name);
5134         ret = -1;
5135         goto out;
5136     }
5137     if (!proto_drv->create_opts) {
5138         error_report("Protocol driver '%s' does not support image creation",
5139                      proto_drv->format_name);
5140         ret = -1;
5141         goto out;
5142     }
5143     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5144     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5145 
5146     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5147 
5148     size = blk_getlength(blk1);
5149     if (size < 0) {
5150         error_report("Failed to get size for '%s'", in.filename);
5151         ret = -1;
5152         goto out;
5153     }
5154 
5155     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
5156         dd.count * in.bsz < size) {
5157         size = dd.count * in.bsz;
5158     }
5159 
5160     /* Overflow means the specified offset is beyond input image's size */
5161     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5162                               size < in.bsz * in.offset)) {
5163         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
5164     } else {
5165         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
5166                             size - in.bsz * in.offset, &error_abort);
5167     }
5168 
5169     ret = bdrv_create(drv, out.filename, opts, &local_err);
5170     if (ret < 0) {
5171         error_reportf_err(local_err,
5172                           "%s: error while creating output image: ",
5173                           out.filename);
5174         ret = -1;
5175         goto out;
5176     }
5177 
5178     /* TODO, we can't honour --image-opts for the target,
5179      * since it needs to be given in a format compatible
5180      * with the bdrv_create() call above which does not
5181      * support image-opts style.
5182      */
5183     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
5184                          false, false, false);
5185 
5186     if (!blk2) {
5187         ret = -1;
5188         goto out;
5189     }
5190 
5191     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5192                               size < in.offset * in.bsz)) {
5193         /* We give a warning if the skip option is bigger than the input
5194          * size and create an empty output disk image (i.e. like dd(1)).
5195          */
5196         error_report("%s: cannot skip to specified offset", in.filename);
5197         in_pos = size;
5198     } else {
5199         in_pos = in.offset * in.bsz;
5200     }
5201 
5202     in.buf = g_new(uint8_t, in.bsz);
5203 
5204     for (out_pos = 0; in_pos < size; ) {
5205         int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
5206 
5207         ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
5208         if (ret < 0) {
5209             error_report("error while reading from input image file: %s",
5210                          strerror(-ret));
5211             goto out;
5212         }
5213         in_pos += bytes;
5214 
5215         ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
5216         if (ret < 0) {
5217             error_report("error while writing to output image file: %s",
5218                          strerror(-ret));
5219             goto out;
5220         }
5221         out_pos += bytes;
5222     }
5223 
5224 out:
5225     g_free(arg);
5226     qemu_opts_del(opts);
5227     qemu_opts_free(create_opts);
5228     blk_unref(blk1);
5229     blk_unref(blk2);
5230     g_free(in.filename);
5231     g_free(out.filename);
5232     g_free(in.buf);
5233     g_free(out.buf);
5234 
5235     if (ret) {
5236         return 1;
5237     }
5238     return 0;
5239 }
5240 
5241 static void dump_json_block_measure_info(BlockMeasureInfo *info)
5242 {
5243     GString *str;
5244     QObject *obj;
5245     Visitor *v = qobject_output_visitor_new(&obj);
5246 
5247     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
5248     visit_complete(v, &obj);
5249     str = qobject_to_json_pretty(obj, true);
5250     assert(str != NULL);
5251     printf("%s\n", str->str);
5252     qobject_unref(obj);
5253     visit_free(v);
5254     g_string_free(str, true);
5255 }
5256 
5257 static int img_measure(int argc, char **argv)
5258 {
5259     static const struct option long_options[] = {
5260         {"help", no_argument, 0, 'h'},
5261         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5262         {"object", required_argument, 0, OPTION_OBJECT},
5263         {"output", required_argument, 0, OPTION_OUTPUT},
5264         {"size", required_argument, 0, OPTION_SIZE},
5265         {"force-share", no_argument, 0, 'U'},
5266         {0, 0, 0, 0}
5267     };
5268     OutputFormat output_format = OFORMAT_HUMAN;
5269     BlockBackend *in_blk = NULL;
5270     BlockDriver *drv;
5271     const char *filename = NULL;
5272     const char *fmt = NULL;
5273     const char *out_fmt = "raw";
5274     char *options = NULL;
5275     char *snapshot_name = NULL;
5276     bool force_share = false;
5277     QemuOpts *opts = NULL;
5278     QemuOpts *object_opts = NULL;
5279     QemuOpts *sn_opts = NULL;
5280     QemuOptsList *create_opts = NULL;
5281     bool image_opts = false;
5282     uint64_t img_size = UINT64_MAX;
5283     BlockMeasureInfo *info = NULL;
5284     Error *local_err = NULL;
5285     int ret = 1;
5286     int c;
5287 
5288     while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
5289                             long_options, NULL)) != -1) {
5290         switch (c) {
5291         case '?':
5292         case 'h':
5293             help();
5294             break;
5295         case 'f':
5296             fmt = optarg;
5297             break;
5298         case 'O':
5299             out_fmt = optarg;
5300             break;
5301         case 'o':
5302             if (accumulate_options(&options, optarg) < 0) {
5303                 goto out;
5304             }
5305             break;
5306         case 'l':
5307             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
5308                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
5309                                                   optarg, false);
5310                 if (!sn_opts) {
5311                     error_report("Failed in parsing snapshot param '%s'",
5312                                  optarg);
5313                     goto out;
5314                 }
5315             } else {
5316                 snapshot_name = optarg;
5317             }
5318             break;
5319         case 'U':
5320             force_share = true;
5321             break;
5322         case OPTION_OBJECT:
5323             user_creatable_process_cmdline(optarg);
5324             break;
5325         case OPTION_IMAGE_OPTS:
5326             image_opts = true;
5327             break;
5328         case OPTION_OUTPUT:
5329             if (!strcmp(optarg, "json")) {
5330                 output_format = OFORMAT_JSON;
5331             } else if (!strcmp(optarg, "human")) {
5332                 output_format = OFORMAT_HUMAN;
5333             } else {
5334                 error_report("--output must be used with human or json "
5335                              "as argument.");
5336                 goto out;
5337             }
5338             break;
5339         case OPTION_SIZE:
5340         {
5341             int64_t sval;
5342 
5343             sval = cvtnum("image size", optarg);
5344             if (sval < 0) {
5345                 goto out;
5346             }
5347             img_size = (uint64_t)sval;
5348         }
5349         break;
5350         }
5351     }
5352 
5353     if (argc - optind > 1) {
5354         error_report("At most one filename argument is allowed.");
5355         goto out;
5356     } else if (argc - optind == 1) {
5357         filename = argv[optind];
5358     }
5359 
5360     if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
5361         error_report("--image-opts, -f, and -l require a filename argument.");
5362         goto out;
5363     }
5364     if (filename && img_size != UINT64_MAX) {
5365         error_report("--size N cannot be used together with a filename.");
5366         goto out;
5367     }
5368     if (!filename && img_size == UINT64_MAX) {
5369         error_report("Either --size N or one filename must be specified.");
5370         goto out;
5371     }
5372 
5373     if (filename) {
5374         in_blk = img_open(image_opts, filename, fmt, 0,
5375                           false, false, force_share);
5376         if (!in_blk) {
5377             goto out;
5378         }
5379 
5380         if (sn_opts) {
5381             bdrv_snapshot_load_tmp(blk_bs(in_blk),
5382                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
5383                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
5384                     &local_err);
5385         } else if (snapshot_name != NULL) {
5386             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
5387                     snapshot_name, &local_err);
5388         }
5389         if (local_err) {
5390             error_reportf_err(local_err, "Failed to load snapshot: ");
5391             goto out;
5392         }
5393     }
5394 
5395     drv = bdrv_find_format(out_fmt);
5396     if (!drv) {
5397         error_report("Unknown file format '%s'", out_fmt);
5398         goto out;
5399     }
5400     if (!drv->create_opts) {
5401         error_report("Format driver '%s' does not support image creation",
5402                      drv->format_name);
5403         goto out;
5404     }
5405 
5406     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5407     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
5408     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5409     if (options) {
5410         if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
5411             error_report_err(local_err);
5412             error_report("Invalid options for file format '%s'", out_fmt);
5413             goto out;
5414         }
5415     }
5416     if (img_size != UINT64_MAX) {
5417         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5418     }
5419 
5420     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
5421     if (local_err) {
5422         error_report_err(local_err);
5423         goto out;
5424     }
5425 
5426     if (output_format == OFORMAT_HUMAN) {
5427         printf("required size: %" PRIu64 "\n", info->required);
5428         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5429         if (info->has_bitmaps) {
5430             printf("bitmaps size: %" PRIu64 "\n", info->bitmaps);
5431         }
5432     } else {
5433         dump_json_block_measure_info(info);
5434     }
5435 
5436     ret = 0;
5437 
5438 out:
5439     qapi_free_BlockMeasureInfo(info);
5440     qemu_opts_del(object_opts);
5441     qemu_opts_del(opts);
5442     qemu_opts_del(sn_opts);
5443     qemu_opts_free(create_opts);
5444     g_free(options);
5445     blk_unref(in_blk);
5446     return ret;
5447 }
5448 
5449 static const img_cmd_t img_cmds[] = {
5450 #define DEF(option, callback, arg_string)        \
5451     { option, callback },
5452 #include "qemu-img-cmds.h"
5453 #undef DEF
5454     { NULL, NULL, },
5455 };
5456 
5457 int main(int argc, char **argv)
5458 {
5459     const img_cmd_t *cmd;
5460     const char *cmdname;
5461     int c;
5462     static const struct option long_options[] = {
5463         {"help", no_argument, 0, 'h'},
5464         {"version", no_argument, 0, 'V'},
5465         {"trace", required_argument, NULL, 'T'},
5466         {0, 0, 0, 0}
5467     };
5468 
5469 #ifdef CONFIG_POSIX
5470     signal(SIGPIPE, SIG_IGN);
5471 #endif
5472 
5473     socket_init();
5474     error_init(argv[0]);
5475     module_call_init(MODULE_INIT_TRACE);
5476     qemu_init_exec_dir(argv[0]);
5477 
5478     qemu_init_main_loop(&error_fatal);
5479 
5480     qcrypto_init(&error_fatal);
5481 
5482     module_call_init(MODULE_INIT_QOM);
5483     bdrv_init();
5484     if (argc < 2) {
5485         error_exit("Not enough arguments");
5486     }
5487 
5488     qemu_add_opts(&qemu_source_opts);
5489     qemu_add_opts(&qemu_trace_opts);
5490 
5491     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5492         switch (c) {
5493         case ':':
5494             missing_argument(argv[optind - 1]);
5495             return 0;
5496         case '?':
5497             unrecognized_option(argv[optind - 1]);
5498             return 0;
5499         case 'h':
5500             help();
5501             return 0;
5502         case 'V':
5503             printf(QEMU_IMG_VERSION);
5504             return 0;
5505         case 'T':
5506             trace_opt_parse(optarg);
5507             break;
5508         }
5509     }
5510 
5511     cmdname = argv[optind];
5512 
5513     /* reset getopt_long scanning */
5514     argc -= optind;
5515     if (argc < 1) {
5516         return 0;
5517     }
5518     argv += optind;
5519     qemu_reset_optind();
5520 
5521     if (!trace_init_backends()) {
5522         exit(1);
5523     }
5524     trace_init_file();
5525     qemu_set_log(LOG_TRACE, &error_fatal);
5526 
5527     /* find the command */
5528     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5529         if (!strcmp(cmdname, cmd->name)) {
5530             return cmd->handler(argc, argv);
5531         }
5532     }
5533 
5534     /* not found */
5535     error_exit("Command not found: %s", cmdname);
5536 }
5537