xref: /openbmc/qemu/qemu-img.c (revision 0b84b662)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu-common.h"
29 #include "qemu-version.h"
30 #include "qapi/error.h"
31 #include "qapi/qapi-visit-block-core.h"
32 #include "qapi/qobject-output-visitor.h"
33 #include "qapi/qmp/qjson.h"
34 #include "qapi/qmp/qdict.h"
35 #include "qapi/qmp/qstring.h"
36 #include "qemu/cutils.h"
37 #include "qemu/config-file.h"
38 #include "qemu/option.h"
39 #include "qemu/error-report.h"
40 #include "qemu/log.h"
41 #include "qemu/main-loop.h"
42 #include "qemu/module.h"
43 #include "qemu/units.h"
44 #include "qom/object_interfaces.h"
45 #include "sysemu/block-backend.h"
46 #include "block/block_int.h"
47 #include "block/blockjob.h"
48 #include "block/qapi.h"
49 #include "crypto/init.h"
50 #include "trace/control.h"
51 
52 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
53                           "\n" QEMU_COPYRIGHT "\n"
54 
55 typedef struct img_cmd_t {
56     const char *name;
57     int (*handler)(int argc, char **argv);
58 } img_cmd_t;
59 
60 enum {
61     OPTION_OUTPUT = 256,
62     OPTION_BACKING_CHAIN = 257,
63     OPTION_OBJECT = 258,
64     OPTION_IMAGE_OPTS = 259,
65     OPTION_PATTERN = 260,
66     OPTION_FLUSH_INTERVAL = 261,
67     OPTION_NO_DRAIN = 262,
68     OPTION_TARGET_IMAGE_OPTS = 263,
69     OPTION_SIZE = 264,
70     OPTION_PREALLOCATION = 265,
71     OPTION_SHRINK = 266,
72     OPTION_SALVAGE = 267,
73 };
74 
75 typedef enum OutputFormat {
76     OFORMAT_JSON,
77     OFORMAT_HUMAN,
78 } OutputFormat;
79 
80 /* Default to cache=writeback as data integrity is not important for qemu-img */
81 #define BDRV_DEFAULT_CACHE "writeback"
82 
83 static void format_print(void *opaque, const char *name)
84 {
85     printf(" %s", name);
86 }
87 
88 static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
89 {
90     va_list ap;
91 
92     va_start(ap, fmt);
93     error_vreport(fmt, ap);
94     va_end(ap);
95 
96     error_printf("Try 'qemu-img --help' for more information\n");
97     exit(EXIT_FAILURE);
98 }
99 
100 static void QEMU_NORETURN missing_argument(const char *option)
101 {
102     error_exit("missing argument for option '%s'", option);
103 }
104 
105 static void QEMU_NORETURN unrecognized_option(const char *option)
106 {
107     error_exit("unrecognized option '%s'", option);
108 }
109 
110 /* Please keep in synch with qemu-img.texi */
111 static void QEMU_NORETURN help(void)
112 {
113     const char *help_msg =
114            QEMU_IMG_VERSION
115            "usage: qemu-img [standard options] command [command options]\n"
116            "QEMU disk image utility\n"
117            "\n"
118            "    '-h', '--help'       display this help and exit\n"
119            "    '-V', '--version'    output version information and exit\n"
120            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
121            "                         specify tracing options\n"
122            "\n"
123            "Command syntax:\n"
124 #define DEF(option, callback, arg_string)        \
125            "  " arg_string "\n"
126 #include "qemu-img-cmds.h"
127 #undef DEF
128            "\n"
129            "Command parameters:\n"
130            "  'filename' is a disk image filename\n"
131            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
132            "    manual page for a description of the object properties. The most common\n"
133            "    object type is a 'secret', which is used to supply passwords and/or\n"
134            "    encryption keys.\n"
135            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
136            "  'cache' is the cache mode used to write the output disk image, the valid\n"
137            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
138            "    'directsync' and 'unsafe' (default for convert)\n"
139            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
140            "    options are the same as for the 'cache' option\n"
141            "  'size' is the disk image size in bytes. Optional suffixes\n"
142            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
143            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
144            "    supported. 'b' is ignored.\n"
145            "  'output_filename' is the destination disk image filename\n"
146            "  'output_fmt' is the destination format\n"
147            "  'options' is a comma separated list of format specific options in a\n"
148            "    name=value format. Use -o ? for an overview of the options supported by the\n"
149            "    used format\n"
150            "  'snapshot_param' is param used for internal snapshot, format\n"
151            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
152            "    '[ID_OR_NAME]'\n"
153            "  '-c' indicates that target image must be compressed (qcow format only)\n"
154            "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
155            "       new backing file match exactly. The image doesn't need a working\n"
156            "       backing file before rebasing in this case (useful for renaming the\n"
157            "       backing file). For image creation, allow creating without attempting\n"
158            "       to open the backing file.\n"
159            "  '-h' with or without a command shows this help and lists the supported formats\n"
160            "  '-p' show progress of command (only certain commands)\n"
161            "  '-q' use Quiet mode - do not print any output (except errors)\n"
162            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
163            "       contain only zeros for qemu-img to create a sparse image during\n"
164            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
165            "       unallocated or zero sectors, and the destination image will always be\n"
166            "       fully allocated\n"
167            "  '--output' takes the format in which the output must be done (human or json)\n"
168            "  '-n' skips the target volume creation (useful if the volume is created\n"
169            "       prior to running qemu-img)\n"
170            "\n"
171            "Parameters to check subcommand:\n"
172            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
173            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
174            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
175            "       hiding corruption that has already occurred.\n"
176            "\n"
177            "Parameters to convert subcommand:\n"
178            "  '-m' specifies how many coroutines work in parallel during the convert\n"
179            "       process (defaults to 8)\n"
180            "  '-W' allow to write to the target out of order rather than sequential\n"
181            "\n"
182            "Parameters to snapshot subcommand:\n"
183            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
184            "  '-a' applies a snapshot (revert disk to saved state)\n"
185            "  '-c' creates a snapshot\n"
186            "  '-d' deletes a snapshot\n"
187            "  '-l' lists all snapshots in the given image\n"
188            "\n"
189            "Parameters to compare subcommand:\n"
190            "  '-f' first image format\n"
191            "  '-F' second image format\n"
192            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
193            "\n"
194            "Parameters to dd subcommand:\n"
195            "  'bs=BYTES' read and write up to BYTES bytes at a time "
196            "(default: 512)\n"
197            "  'count=N' copy only N input blocks\n"
198            "  'if=FILE' read from FILE\n"
199            "  'of=FILE' write to FILE\n"
200            "  'skip=N' skip N bs-sized blocks at the start of input\n";
201 
202     printf("%s\nSupported formats:", help_msg);
203     bdrv_iterate_format(format_print, NULL, false);
204     printf("\n\n" QEMU_HELP_BOTTOM "\n");
205     exit(EXIT_SUCCESS);
206 }
207 
208 static QemuOptsList qemu_object_opts = {
209     .name = "object",
210     .implied_opt_name = "qom-type",
211     .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
212     .desc = {
213         { }
214     },
215 };
216 
217 static QemuOptsList qemu_source_opts = {
218     .name = "source",
219     .implied_opt_name = "file",
220     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
221     .desc = {
222         { }
223     },
224 };
225 
226 static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
227 {
228     int ret = 0;
229     if (!quiet) {
230         va_list args;
231         va_start(args, fmt);
232         ret = vprintf(fmt, args);
233         va_end(args);
234     }
235     return ret;
236 }
237 
238 
239 static int print_block_option_help(const char *filename, const char *fmt)
240 {
241     BlockDriver *drv, *proto_drv;
242     QemuOptsList *create_opts = NULL;
243     Error *local_err = NULL;
244 
245     /* Find driver and parse its options */
246     drv = bdrv_find_format(fmt);
247     if (!drv) {
248         error_report("Unknown file format '%s'", fmt);
249         return 1;
250     }
251 
252     if (!drv->create_opts) {
253         error_report("Format driver '%s' does not support image creation", fmt);
254         return 1;
255     }
256 
257     create_opts = qemu_opts_append(create_opts, drv->create_opts);
258     if (filename) {
259         proto_drv = bdrv_find_protocol(filename, true, &local_err);
260         if (!proto_drv) {
261             error_report_err(local_err);
262             qemu_opts_free(create_opts);
263             return 1;
264         }
265         if (!proto_drv->create_opts) {
266             error_report("Protocol driver '%s' does not support image creation",
267                          proto_drv->format_name);
268             qemu_opts_free(create_opts);
269             return 1;
270         }
271         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
272     }
273 
274     if (filename) {
275         printf("Supported options:\n");
276     } else {
277         printf("Supported %s options:\n", fmt);
278     }
279     qemu_opts_print_help(create_opts, false);
280     qemu_opts_free(create_opts);
281 
282     if (!filename) {
283         printf("\n"
284                "The protocol level may support further options.\n"
285                "Specify the target filename to include those options.\n");
286     }
287 
288     return 0;
289 }
290 
291 
292 static BlockBackend *img_open_opts(const char *optstr,
293                                    QemuOpts *opts, int flags, bool writethrough,
294                                    bool quiet, bool force_share)
295 {
296     QDict *options;
297     Error *local_err = NULL;
298     BlockBackend *blk;
299     options = qemu_opts_to_qdict(opts, NULL);
300     if (force_share) {
301         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
302             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
303             error_report("--force-share/-U conflicts with image options");
304             qobject_unref(options);
305             return NULL;
306         }
307         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
308     }
309     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
310     if (!blk) {
311         error_reportf_err(local_err, "Could not open '%s': ", optstr);
312         return NULL;
313     }
314     blk_set_enable_write_cache(blk, !writethrough);
315 
316     return blk;
317 }
318 
319 static BlockBackend *img_open_file(const char *filename,
320                                    QDict *options,
321                                    const char *fmt, int flags,
322                                    bool writethrough, bool quiet,
323                                    bool force_share)
324 {
325     BlockBackend *blk;
326     Error *local_err = NULL;
327 
328     if (!options) {
329         options = qdict_new();
330     }
331     if (fmt) {
332         qdict_put_str(options, "driver", fmt);
333     }
334 
335     if (force_share) {
336         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
337     }
338     blk = blk_new_open(filename, NULL, options, flags, &local_err);
339     if (!blk) {
340         error_reportf_err(local_err, "Could not open '%s': ", filename);
341         return NULL;
342     }
343     blk_set_enable_write_cache(blk, !writethrough);
344 
345     return blk;
346 }
347 
348 
349 static int img_add_key_secrets(void *opaque,
350                                const char *name, const char *value,
351                                Error **errp)
352 {
353     QDict *options = opaque;
354 
355     if (g_str_has_suffix(name, "key-secret")) {
356         qdict_put_str(options, name, value);
357     }
358 
359     return 0;
360 }
361 
362 
363 static BlockBackend *img_open(bool image_opts,
364                               const char *filename,
365                               const char *fmt, int flags, bool writethrough,
366                               bool quiet, bool force_share)
367 {
368     BlockBackend *blk;
369     if (image_opts) {
370         QemuOpts *opts;
371         if (fmt) {
372             error_report("--image-opts and --format are mutually exclusive");
373             return NULL;
374         }
375         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
376                                        filename, true);
377         if (!opts) {
378             return NULL;
379         }
380         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
381                             force_share);
382     } else {
383         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
384                             force_share);
385     }
386     return blk;
387 }
388 
389 
390 static int add_old_style_options(const char *fmt, QemuOpts *opts,
391                                  const char *base_filename,
392                                  const char *base_fmt)
393 {
394     Error *err = NULL;
395 
396     if (base_filename) {
397         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
398         if (err) {
399             error_report("Backing file not supported for file format '%s'",
400                          fmt);
401             error_free(err);
402             return -1;
403         }
404     }
405     if (base_fmt) {
406         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
407         if (err) {
408             error_report("Backing file format not supported for file "
409                          "format '%s'", fmt);
410             error_free(err);
411             return -1;
412         }
413     }
414     return 0;
415 }
416 
417 static int64_t cvtnum(const char *s)
418 {
419     int err;
420     uint64_t value;
421 
422     err = qemu_strtosz(s, NULL, &value);
423     if (err < 0) {
424         return err;
425     }
426     if (value > INT64_MAX) {
427         return -ERANGE;
428     }
429     return value;
430 }
431 
432 static int img_create(int argc, char **argv)
433 {
434     int c;
435     uint64_t img_size = -1;
436     const char *fmt = "raw";
437     const char *base_fmt = NULL;
438     const char *filename;
439     const char *base_filename = NULL;
440     char *options = NULL;
441     Error *local_err = NULL;
442     bool quiet = false;
443     int flags = 0;
444 
445     for(;;) {
446         static const struct option long_options[] = {
447             {"help", no_argument, 0, 'h'},
448             {"object", required_argument, 0, OPTION_OBJECT},
449             {0, 0, 0, 0}
450         };
451         c = getopt_long(argc, argv, ":F:b:f:ho:qu",
452                         long_options, NULL);
453         if (c == -1) {
454             break;
455         }
456         switch(c) {
457         case ':':
458             missing_argument(argv[optind - 1]);
459             break;
460         case '?':
461             unrecognized_option(argv[optind - 1]);
462             break;
463         case 'h':
464             help();
465             break;
466         case 'F':
467             base_fmt = optarg;
468             break;
469         case 'b':
470             base_filename = optarg;
471             break;
472         case 'f':
473             fmt = optarg;
474             break;
475         case 'o':
476             if (!is_valid_option_list(optarg)) {
477                 error_report("Invalid option list: %s", optarg);
478                 goto fail;
479             }
480             if (!options) {
481                 options = g_strdup(optarg);
482             } else {
483                 char *old_options = options;
484                 options = g_strdup_printf("%s,%s", options, optarg);
485                 g_free(old_options);
486             }
487             break;
488         case 'q':
489             quiet = true;
490             break;
491         case 'u':
492             flags |= BDRV_O_NO_BACKING;
493             break;
494         case OPTION_OBJECT: {
495             QemuOpts *opts;
496             opts = qemu_opts_parse_noisily(&qemu_object_opts,
497                                            optarg, true);
498             if (!opts) {
499                 goto fail;
500             }
501         }   break;
502         }
503     }
504 
505     /* Get the filename */
506     filename = (optind < argc) ? argv[optind] : NULL;
507     if (options && has_help_option(options)) {
508         g_free(options);
509         return print_block_option_help(filename, fmt);
510     }
511 
512     if (optind >= argc) {
513         error_exit("Expecting image file name");
514     }
515     optind++;
516 
517     if (qemu_opts_foreach(&qemu_object_opts,
518                           user_creatable_add_opts_foreach,
519                           NULL, &error_fatal)) {
520         goto fail;
521     }
522 
523     /* Get image size, if specified */
524     if (optind < argc) {
525         int64_t sval;
526 
527         sval = cvtnum(argv[optind++]);
528         if (sval < 0) {
529             if (sval == -ERANGE) {
530                 error_report("Image size must be less than 8 EiB!");
531             } else {
532                 error_report("Invalid image size specified! You may use k, M, "
533                       "G, T, P or E suffixes for ");
534                 error_report("kilobytes, megabytes, gigabytes, terabytes, "
535                              "petabytes and exabytes.");
536             }
537             goto fail;
538         }
539         img_size = (uint64_t)sval;
540     }
541     if (optind != argc) {
542         error_exit("Unexpected argument: %s", argv[optind]);
543     }
544 
545     bdrv_img_create(filename, fmt, base_filename, base_fmt,
546                     options, img_size, flags, quiet, &local_err);
547     if (local_err) {
548         error_reportf_err(local_err, "%s: ", filename);
549         goto fail;
550     }
551 
552     g_free(options);
553     return 0;
554 
555 fail:
556     g_free(options);
557     return 1;
558 }
559 
560 static void dump_json_image_check(ImageCheck *check, bool quiet)
561 {
562     QString *str;
563     QObject *obj;
564     Visitor *v = qobject_output_visitor_new(&obj);
565 
566     visit_type_ImageCheck(v, NULL, &check, &error_abort);
567     visit_complete(v, &obj);
568     str = qobject_to_json_pretty(obj);
569     assert(str != NULL);
570     qprintf(quiet, "%s\n", qstring_get_str(str));
571     qobject_unref(obj);
572     visit_free(v);
573     qobject_unref(str);
574 }
575 
576 static void dump_human_image_check(ImageCheck *check, bool quiet)
577 {
578     if (!(check->corruptions || check->leaks || check->check_errors)) {
579         qprintf(quiet, "No errors were found on the image.\n");
580     } else {
581         if (check->corruptions) {
582             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
583                     "Data may be corrupted, or further writes to the image "
584                     "may corrupt it.\n",
585                     check->corruptions);
586         }
587 
588         if (check->leaks) {
589             qprintf(quiet,
590                     "\n%" PRId64 " leaked clusters were found on the image.\n"
591                     "This means waste of disk space, but no harm to data.\n",
592                     check->leaks);
593         }
594 
595         if (check->check_errors) {
596             qprintf(quiet,
597                     "\n%" PRId64
598                     " internal errors have occurred during the check.\n",
599                     check->check_errors);
600         }
601     }
602 
603     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
604         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
605                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
606                 check->allocated_clusters, check->total_clusters,
607                 check->allocated_clusters * 100.0 / check->total_clusters,
608                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
609                 check->compressed_clusters * 100.0 /
610                 check->allocated_clusters);
611     }
612 
613     if (check->image_end_offset) {
614         qprintf(quiet,
615                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
616     }
617 }
618 
619 static int collect_image_check(BlockDriverState *bs,
620                    ImageCheck *check,
621                    const char *filename,
622                    const char *fmt,
623                    int fix)
624 {
625     int ret;
626     BdrvCheckResult result;
627 
628     ret = bdrv_check(bs, &result, fix);
629     if (ret < 0) {
630         return ret;
631     }
632 
633     check->filename                 = g_strdup(filename);
634     check->format                   = g_strdup(bdrv_get_format_name(bs));
635     check->check_errors             = result.check_errors;
636     check->corruptions              = result.corruptions;
637     check->has_corruptions          = result.corruptions != 0;
638     check->leaks                    = result.leaks;
639     check->has_leaks                = result.leaks != 0;
640     check->corruptions_fixed        = result.corruptions_fixed;
641     check->has_corruptions_fixed    = result.corruptions != 0;
642     check->leaks_fixed              = result.leaks_fixed;
643     check->has_leaks_fixed          = result.leaks != 0;
644     check->image_end_offset         = result.image_end_offset;
645     check->has_image_end_offset     = result.image_end_offset != 0;
646     check->total_clusters           = result.bfi.total_clusters;
647     check->has_total_clusters       = result.bfi.total_clusters != 0;
648     check->allocated_clusters       = result.bfi.allocated_clusters;
649     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
650     check->fragmented_clusters      = result.bfi.fragmented_clusters;
651     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
652     check->compressed_clusters      = result.bfi.compressed_clusters;
653     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
654 
655     return 0;
656 }
657 
658 /*
659  * Checks an image for consistency. Exit codes:
660  *
661  *  0 - Check completed, image is good
662  *  1 - Check not completed because of internal errors
663  *  2 - Check completed, image is corrupted
664  *  3 - Check completed, image has leaked clusters, but is good otherwise
665  * 63 - Checks are not supported by the image format
666  */
667 static int img_check(int argc, char **argv)
668 {
669     int c, ret;
670     OutputFormat output_format = OFORMAT_HUMAN;
671     const char *filename, *fmt, *output, *cache;
672     BlockBackend *blk;
673     BlockDriverState *bs;
674     int fix = 0;
675     int flags = BDRV_O_CHECK;
676     bool writethrough;
677     ImageCheck *check;
678     bool quiet = false;
679     bool image_opts = false;
680     bool force_share = false;
681 
682     fmt = NULL;
683     output = NULL;
684     cache = BDRV_DEFAULT_CACHE;
685 
686     for(;;) {
687         int option_index = 0;
688         static const struct option long_options[] = {
689             {"help", no_argument, 0, 'h'},
690             {"format", required_argument, 0, 'f'},
691             {"repair", required_argument, 0, 'r'},
692             {"output", required_argument, 0, OPTION_OUTPUT},
693             {"object", required_argument, 0, OPTION_OBJECT},
694             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
695             {"force-share", no_argument, 0, 'U'},
696             {0, 0, 0, 0}
697         };
698         c = getopt_long(argc, argv, ":hf:r:T:qU",
699                         long_options, &option_index);
700         if (c == -1) {
701             break;
702         }
703         switch(c) {
704         case ':':
705             missing_argument(argv[optind - 1]);
706             break;
707         case '?':
708             unrecognized_option(argv[optind - 1]);
709             break;
710         case 'h':
711             help();
712             break;
713         case 'f':
714             fmt = optarg;
715             break;
716         case 'r':
717             flags |= BDRV_O_RDWR;
718 
719             if (!strcmp(optarg, "leaks")) {
720                 fix = BDRV_FIX_LEAKS;
721             } else if (!strcmp(optarg, "all")) {
722                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
723             } else {
724                 error_exit("Unknown option value for -r "
725                            "(expecting 'leaks' or 'all'): %s", optarg);
726             }
727             break;
728         case OPTION_OUTPUT:
729             output = optarg;
730             break;
731         case 'T':
732             cache = optarg;
733             break;
734         case 'q':
735             quiet = true;
736             break;
737         case 'U':
738             force_share = true;
739             break;
740         case OPTION_OBJECT: {
741             QemuOpts *opts;
742             opts = qemu_opts_parse_noisily(&qemu_object_opts,
743                                            optarg, true);
744             if (!opts) {
745                 return 1;
746             }
747         }   break;
748         case OPTION_IMAGE_OPTS:
749             image_opts = true;
750             break;
751         }
752     }
753     if (optind != argc - 1) {
754         error_exit("Expecting one image file name");
755     }
756     filename = argv[optind++];
757 
758     if (output && !strcmp(output, "json")) {
759         output_format = OFORMAT_JSON;
760     } else if (output && !strcmp(output, "human")) {
761         output_format = OFORMAT_HUMAN;
762     } else if (output) {
763         error_report("--output must be used with human or json as argument.");
764         return 1;
765     }
766 
767     if (qemu_opts_foreach(&qemu_object_opts,
768                           user_creatable_add_opts_foreach,
769                           NULL, &error_fatal)) {
770         return 1;
771     }
772 
773     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
774     if (ret < 0) {
775         error_report("Invalid source cache option: %s", cache);
776         return 1;
777     }
778 
779     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
780                    force_share);
781     if (!blk) {
782         return 1;
783     }
784     bs = blk_bs(blk);
785 
786     check = g_new0(ImageCheck, 1);
787     ret = collect_image_check(bs, check, filename, fmt, fix);
788 
789     if (ret == -ENOTSUP) {
790         error_report("This image format does not support checks");
791         ret = 63;
792         goto fail;
793     }
794 
795     if (check->corruptions_fixed || check->leaks_fixed) {
796         int corruptions_fixed, leaks_fixed;
797 
798         leaks_fixed         = check->leaks_fixed;
799         corruptions_fixed   = check->corruptions_fixed;
800 
801         if (output_format == OFORMAT_HUMAN) {
802             qprintf(quiet,
803                     "The following inconsistencies were found and repaired:\n\n"
804                     "    %" PRId64 " leaked clusters\n"
805                     "    %" PRId64 " corruptions\n\n"
806                     "Double checking the fixed image now...\n",
807                     check->leaks_fixed,
808                     check->corruptions_fixed);
809         }
810 
811         ret = collect_image_check(bs, check, filename, fmt, 0);
812 
813         check->leaks_fixed          = leaks_fixed;
814         check->corruptions_fixed    = corruptions_fixed;
815     }
816 
817     if (!ret) {
818         switch (output_format) {
819         case OFORMAT_HUMAN:
820             dump_human_image_check(check, quiet);
821             break;
822         case OFORMAT_JSON:
823             dump_json_image_check(check, quiet);
824             break;
825         }
826     }
827 
828     if (ret || check->check_errors) {
829         if (ret) {
830             error_report("Check failed: %s", strerror(-ret));
831         } else {
832             error_report("Check failed");
833         }
834         ret = 1;
835         goto fail;
836     }
837 
838     if (check->corruptions) {
839         ret = 2;
840     } else if (check->leaks) {
841         ret = 3;
842     } else {
843         ret = 0;
844     }
845 
846 fail:
847     qapi_free_ImageCheck(check);
848     blk_unref(blk);
849     return ret;
850 }
851 
852 typedef struct CommonBlockJobCBInfo {
853     BlockDriverState *bs;
854     Error **errp;
855 } CommonBlockJobCBInfo;
856 
857 static void common_block_job_cb(void *opaque, int ret)
858 {
859     CommonBlockJobCBInfo *cbi = opaque;
860 
861     if (ret < 0) {
862         error_setg_errno(cbi->errp, -ret, "Block job failed");
863     }
864 }
865 
866 static void run_block_job(BlockJob *job, Error **errp)
867 {
868     AioContext *aio_context = blk_get_aio_context(job->blk);
869     int ret = 0;
870 
871     aio_context_acquire(aio_context);
872     job_ref(&job->job);
873     do {
874         float progress = 0.0f;
875         aio_poll(aio_context, true);
876         if (job->job.progress_total) {
877             progress = (float)job->job.progress_current /
878                        job->job.progress_total * 100.f;
879         }
880         qemu_progress_print(progress, 0);
881     } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
882 
883     if (!job_is_completed(&job->job)) {
884         ret = job_complete_sync(&job->job, errp);
885     } else {
886         ret = job->job.ret;
887     }
888     job_unref(&job->job);
889     aio_context_release(aio_context);
890 
891     /* publish completion progress only when success */
892     if (!ret) {
893         qemu_progress_print(100.f, 0);
894     }
895 }
896 
897 static int img_commit(int argc, char **argv)
898 {
899     int c, ret, flags;
900     const char *filename, *fmt, *cache, *base;
901     BlockBackend *blk;
902     BlockDriverState *bs, *base_bs;
903     BlockJob *job;
904     bool progress = false, quiet = false, drop = false;
905     bool writethrough;
906     Error *local_err = NULL;
907     CommonBlockJobCBInfo cbi;
908     bool image_opts = false;
909     AioContext *aio_context;
910 
911     fmt = NULL;
912     cache = BDRV_DEFAULT_CACHE;
913     base = NULL;
914     for(;;) {
915         static const struct option long_options[] = {
916             {"help", no_argument, 0, 'h'},
917             {"object", required_argument, 0, OPTION_OBJECT},
918             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
919             {0, 0, 0, 0}
920         };
921         c = getopt_long(argc, argv, ":f:ht:b:dpq",
922                         long_options, NULL);
923         if (c == -1) {
924             break;
925         }
926         switch(c) {
927         case ':':
928             missing_argument(argv[optind - 1]);
929             break;
930         case '?':
931             unrecognized_option(argv[optind - 1]);
932             break;
933         case 'h':
934             help();
935             break;
936         case 'f':
937             fmt = optarg;
938             break;
939         case 't':
940             cache = optarg;
941             break;
942         case 'b':
943             base = optarg;
944             /* -b implies -d */
945             drop = true;
946             break;
947         case 'd':
948             drop = true;
949             break;
950         case 'p':
951             progress = true;
952             break;
953         case 'q':
954             quiet = true;
955             break;
956         case OPTION_OBJECT: {
957             QemuOpts *opts;
958             opts = qemu_opts_parse_noisily(&qemu_object_opts,
959                                            optarg, true);
960             if (!opts) {
961                 return 1;
962             }
963         }   break;
964         case OPTION_IMAGE_OPTS:
965             image_opts = true;
966             break;
967         }
968     }
969 
970     /* Progress is not shown in Quiet mode */
971     if (quiet) {
972         progress = false;
973     }
974 
975     if (optind != argc - 1) {
976         error_exit("Expecting one image file name");
977     }
978     filename = argv[optind++];
979 
980     if (qemu_opts_foreach(&qemu_object_opts,
981                           user_creatable_add_opts_foreach,
982                           NULL, &error_fatal)) {
983         return 1;
984     }
985 
986     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
987     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
988     if (ret < 0) {
989         error_report("Invalid cache option: %s", cache);
990         return 1;
991     }
992 
993     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
994                    false);
995     if (!blk) {
996         return 1;
997     }
998     bs = blk_bs(blk);
999 
1000     qemu_progress_init(progress, 1.f);
1001     qemu_progress_print(0.f, 100);
1002 
1003     if (base) {
1004         base_bs = bdrv_find_backing_image(bs, base);
1005         if (!base_bs) {
1006             error_setg(&local_err,
1007                        "Did not find '%s' in the backing chain of '%s'",
1008                        base, filename);
1009             goto done;
1010         }
1011     } else {
1012         /* This is different from QMP, which by default uses the deepest file in
1013          * the backing chain (i.e., the very base); however, the traditional
1014          * behavior of qemu-img commit is using the immediate backing file. */
1015         base_bs = backing_bs(bs);
1016         if (!base_bs) {
1017             error_setg(&local_err, "Image does not have a backing file");
1018             goto done;
1019         }
1020     }
1021 
1022     cbi = (CommonBlockJobCBInfo){
1023         .errp = &local_err,
1024         .bs   = bs,
1025     };
1026 
1027     aio_context = bdrv_get_aio_context(bs);
1028     aio_context_acquire(aio_context);
1029     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, 0,
1030                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1031                         &cbi, false, &local_err);
1032     aio_context_release(aio_context);
1033     if (local_err) {
1034         goto done;
1035     }
1036 
1037     /* When the block job completes, the BlockBackend reference will point to
1038      * the old backing file. In order to avoid that the top image is already
1039      * deleted, so we can still empty it afterwards, increment the reference
1040      * counter here preemptively. */
1041     if (!drop) {
1042         bdrv_ref(bs);
1043     }
1044 
1045     job = block_job_get("commit");
1046     assert(job);
1047     run_block_job(job, &local_err);
1048     if (local_err) {
1049         goto unref_backing;
1050     }
1051 
1052     if (!drop && bs->drv->bdrv_make_empty) {
1053         ret = bs->drv->bdrv_make_empty(bs);
1054         if (ret) {
1055             error_setg_errno(&local_err, -ret, "Could not empty %s",
1056                              filename);
1057             goto unref_backing;
1058         }
1059     }
1060 
1061 unref_backing:
1062     if (!drop) {
1063         bdrv_unref(bs);
1064     }
1065 
1066 done:
1067     qemu_progress_end();
1068 
1069     blk_unref(blk);
1070 
1071     if (local_err) {
1072         error_report_err(local_err);
1073         return 1;
1074     }
1075 
1076     qprintf(quiet, "Image committed.\n");
1077     return 0;
1078 }
1079 
1080 /*
1081  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1082  * of the first sector boundary within buf where the sector contains a
1083  * non-zero byte.  This function is robust to a buffer that is not
1084  * sector-aligned.
1085  */
1086 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1087 {
1088     int64_t i;
1089     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1090 
1091     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1092         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1093             return i;
1094         }
1095     }
1096     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1097         return i;
1098     }
1099     return -1;
1100 }
1101 
1102 /*
1103  * Returns true iff the first sector pointed to by 'buf' contains at least
1104  * a non-NUL byte.
1105  *
1106  * 'pnum' is set to the number of sectors (including and immediately following
1107  * the first one) that are known to be in the same allocated/unallocated state.
1108  * The function will try to align the end offset to alignment boundaries so
1109  * that the request will at least end aligned and consequtive requests will
1110  * also start at an aligned offset.
1111  */
1112 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1113                                 int64_t sector_num, int alignment)
1114 {
1115     bool is_zero;
1116     int i, tail;
1117 
1118     if (n <= 0) {
1119         *pnum = 0;
1120         return 0;
1121     }
1122     is_zero = buffer_is_zero(buf, 512);
1123     for(i = 1; i < n; i++) {
1124         buf += 512;
1125         if (is_zero != buffer_is_zero(buf, 512)) {
1126             break;
1127         }
1128     }
1129 
1130     tail = (sector_num + i) & (alignment - 1);
1131     if (tail) {
1132         if (is_zero && i <= tail) {
1133             /* treat unallocated areas which only consist
1134              * of a small tail as allocated. */
1135             is_zero = false;
1136         }
1137         if (!is_zero) {
1138             /* align up end offset of allocated areas. */
1139             i += alignment - tail;
1140             i = MIN(i, n);
1141         } else {
1142             /* align down end offset of zero areas. */
1143             i -= tail;
1144         }
1145     }
1146     *pnum = i;
1147     return !is_zero;
1148 }
1149 
1150 /*
1151  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1152  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1153  * breaking up write requests for only small sparse areas.
1154  */
1155 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1156     int min, int64_t sector_num, int alignment)
1157 {
1158     int ret;
1159     int num_checked, num_used;
1160 
1161     if (n < min) {
1162         min = n;
1163     }
1164 
1165     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1166     if (!ret) {
1167         return ret;
1168     }
1169 
1170     num_used = *pnum;
1171     buf += BDRV_SECTOR_SIZE * *pnum;
1172     n -= *pnum;
1173     sector_num += *pnum;
1174     num_checked = num_used;
1175 
1176     while (n > 0) {
1177         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1178 
1179         buf += BDRV_SECTOR_SIZE * *pnum;
1180         n -= *pnum;
1181         sector_num += *pnum;
1182         num_checked += *pnum;
1183         if (ret) {
1184             num_used = num_checked;
1185         } else if (*pnum >= min) {
1186             break;
1187         }
1188     }
1189 
1190     *pnum = num_used;
1191     return 1;
1192 }
1193 
1194 /*
1195  * Compares two buffers sector by sector. Returns 0 if the first
1196  * sector of each buffer matches, non-zero otherwise.
1197  *
1198  * pnum is set to the sector-aligned size of the buffer prefix that
1199  * has the same matching status as the first sector.
1200  */
1201 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1202                            int64_t bytes, int64_t *pnum)
1203 {
1204     bool res;
1205     int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
1206 
1207     assert(bytes > 0);
1208 
1209     res = !!memcmp(buf1, buf2, i);
1210     while (i < bytes) {
1211         int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
1212 
1213         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1214             break;
1215         }
1216         i += len;
1217     }
1218 
1219     *pnum = i;
1220     return res;
1221 }
1222 
1223 #define IO_BUF_SIZE (2 * MiB)
1224 
1225 /*
1226  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1227  *
1228  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1229  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1230  * failure), and 4 on error (the exit status for read errors), after emitting
1231  * an error message.
1232  *
1233  * @param blk:  BlockBackend for the image
1234  * @param offset: Starting offset to check
1235  * @param bytes: Number of bytes to check
1236  * @param filename: Name of disk file we are checking (logging purpose)
1237  * @param buffer: Allocated buffer for storing read data
1238  * @param quiet: Flag for quiet mode
1239  */
1240 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1241                                int64_t bytes, const char *filename,
1242                                uint8_t *buffer, bool quiet)
1243 {
1244     int ret = 0;
1245     int64_t idx;
1246 
1247     ret = blk_pread(blk, offset, buffer, bytes);
1248     if (ret < 0) {
1249         error_report("Error while reading offset %" PRId64 " of %s: %s",
1250                      offset, filename, strerror(-ret));
1251         return 4;
1252     }
1253     idx = find_nonzero(buffer, bytes);
1254     if (idx >= 0) {
1255         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1256                 offset + idx);
1257         return 1;
1258     }
1259 
1260     return 0;
1261 }
1262 
1263 /*
1264  * Compares two images. Exit codes:
1265  *
1266  * 0 - Images are identical
1267  * 1 - Images differ
1268  * >1 - Error occurred
1269  */
1270 static int img_compare(int argc, char **argv)
1271 {
1272     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1273     BlockBackend *blk1, *blk2;
1274     BlockDriverState *bs1, *bs2;
1275     int64_t total_size1, total_size2;
1276     uint8_t *buf1 = NULL, *buf2 = NULL;
1277     int64_t pnum1, pnum2;
1278     int allocated1, allocated2;
1279     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1280     bool progress = false, quiet = false, strict = false;
1281     int flags;
1282     bool writethrough;
1283     int64_t total_size;
1284     int64_t offset = 0;
1285     int64_t chunk;
1286     int c;
1287     uint64_t progress_base;
1288     bool image_opts = false;
1289     bool force_share = false;
1290 
1291     cache = BDRV_DEFAULT_CACHE;
1292     for (;;) {
1293         static const struct option long_options[] = {
1294             {"help", no_argument, 0, 'h'},
1295             {"object", required_argument, 0, OPTION_OBJECT},
1296             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1297             {"force-share", no_argument, 0, 'U'},
1298             {0, 0, 0, 0}
1299         };
1300         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1301                         long_options, NULL);
1302         if (c == -1) {
1303             break;
1304         }
1305         switch (c) {
1306         case ':':
1307             missing_argument(argv[optind - 1]);
1308             break;
1309         case '?':
1310             unrecognized_option(argv[optind - 1]);
1311             break;
1312         case 'h':
1313             help();
1314             break;
1315         case 'f':
1316             fmt1 = optarg;
1317             break;
1318         case 'F':
1319             fmt2 = optarg;
1320             break;
1321         case 'T':
1322             cache = optarg;
1323             break;
1324         case 'p':
1325             progress = true;
1326             break;
1327         case 'q':
1328             quiet = true;
1329             break;
1330         case 's':
1331             strict = true;
1332             break;
1333         case 'U':
1334             force_share = true;
1335             break;
1336         case OPTION_OBJECT: {
1337             QemuOpts *opts;
1338             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1339                                            optarg, true);
1340             if (!opts) {
1341                 ret = 2;
1342                 goto out4;
1343             }
1344         }   break;
1345         case OPTION_IMAGE_OPTS:
1346             image_opts = true;
1347             break;
1348         }
1349     }
1350 
1351     /* Progress is not shown in Quiet mode */
1352     if (quiet) {
1353         progress = false;
1354     }
1355 
1356 
1357     if (optind != argc - 2) {
1358         error_exit("Expecting two image file names");
1359     }
1360     filename1 = argv[optind++];
1361     filename2 = argv[optind++];
1362 
1363     if (qemu_opts_foreach(&qemu_object_opts,
1364                           user_creatable_add_opts_foreach,
1365                           NULL, &error_fatal)) {
1366         ret = 2;
1367         goto out4;
1368     }
1369 
1370     /* Initialize before goto out */
1371     qemu_progress_init(progress, 2.0);
1372 
1373     flags = 0;
1374     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1375     if (ret < 0) {
1376         error_report("Invalid source cache option: %s", cache);
1377         ret = 2;
1378         goto out3;
1379     }
1380 
1381     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1382                     force_share);
1383     if (!blk1) {
1384         ret = 2;
1385         goto out3;
1386     }
1387 
1388     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1389                     force_share);
1390     if (!blk2) {
1391         ret = 2;
1392         goto out2;
1393     }
1394     bs1 = blk_bs(blk1);
1395     bs2 = blk_bs(blk2);
1396 
1397     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1398     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1399     total_size1 = blk_getlength(blk1);
1400     if (total_size1 < 0) {
1401         error_report("Can't get size of %s: %s",
1402                      filename1, strerror(-total_size1));
1403         ret = 4;
1404         goto out;
1405     }
1406     total_size2 = blk_getlength(blk2);
1407     if (total_size2 < 0) {
1408         error_report("Can't get size of %s: %s",
1409                      filename2, strerror(-total_size2));
1410         ret = 4;
1411         goto out;
1412     }
1413     total_size = MIN(total_size1, total_size2);
1414     progress_base = MAX(total_size1, total_size2);
1415 
1416     qemu_progress_print(0, 100);
1417 
1418     if (strict && total_size1 != total_size2) {
1419         ret = 1;
1420         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1421         goto out;
1422     }
1423 
1424     while (offset < total_size) {
1425         int status1, status2;
1426 
1427         status1 = bdrv_block_status_above(bs1, NULL, offset,
1428                                           total_size1 - offset, &pnum1, NULL,
1429                                           NULL);
1430         if (status1 < 0) {
1431             ret = 3;
1432             error_report("Sector allocation test failed for %s", filename1);
1433             goto out;
1434         }
1435         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1436 
1437         status2 = bdrv_block_status_above(bs2, NULL, offset,
1438                                           total_size2 - offset, &pnum2, NULL,
1439                                           NULL);
1440         if (status2 < 0) {
1441             ret = 3;
1442             error_report("Sector allocation test failed for %s", filename2);
1443             goto out;
1444         }
1445         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1446 
1447         assert(pnum1 && pnum2);
1448         chunk = MIN(pnum1, pnum2);
1449 
1450         if (strict) {
1451             if (status1 != status2) {
1452                 ret = 1;
1453                 qprintf(quiet, "Strict mode: Offset %" PRId64
1454                         " block status mismatch!\n", offset);
1455                 goto out;
1456             }
1457         }
1458         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1459             /* nothing to do */
1460         } else if (allocated1 == allocated2) {
1461             if (allocated1) {
1462                 int64_t pnum;
1463 
1464                 chunk = MIN(chunk, IO_BUF_SIZE);
1465                 ret = blk_pread(blk1, offset, buf1, chunk);
1466                 if (ret < 0) {
1467                     error_report("Error while reading offset %" PRId64
1468                                  " of %s: %s",
1469                                  offset, filename1, strerror(-ret));
1470                     ret = 4;
1471                     goto out;
1472                 }
1473                 ret = blk_pread(blk2, offset, buf2, chunk);
1474                 if (ret < 0) {
1475                     error_report("Error while reading offset %" PRId64
1476                                  " of %s: %s",
1477                                  offset, filename2, strerror(-ret));
1478                     ret = 4;
1479                     goto out;
1480                 }
1481                 ret = compare_buffers(buf1, buf2, chunk, &pnum);
1482                 if (ret || pnum != chunk) {
1483                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1484                             offset + (ret ? 0 : pnum));
1485                     ret = 1;
1486                     goto out;
1487                 }
1488             }
1489         } else {
1490             chunk = MIN(chunk, IO_BUF_SIZE);
1491             if (allocated1) {
1492                 ret = check_empty_sectors(blk1, offset, chunk,
1493                                           filename1, buf1, quiet);
1494             } else {
1495                 ret = check_empty_sectors(blk2, offset, chunk,
1496                                           filename2, buf1, quiet);
1497             }
1498             if (ret) {
1499                 goto out;
1500             }
1501         }
1502         offset += chunk;
1503         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1504     }
1505 
1506     if (total_size1 != total_size2) {
1507         BlockBackend *blk_over;
1508         const char *filename_over;
1509 
1510         qprintf(quiet, "Warning: Image size mismatch!\n");
1511         if (total_size1 > total_size2) {
1512             blk_over = blk1;
1513             filename_over = filename1;
1514         } else {
1515             blk_over = blk2;
1516             filename_over = filename2;
1517         }
1518 
1519         while (offset < progress_base) {
1520             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1521                                           progress_base - offset, &chunk,
1522                                           NULL, NULL);
1523             if (ret < 0) {
1524                 ret = 3;
1525                 error_report("Sector allocation test failed for %s",
1526                              filename_over);
1527                 goto out;
1528 
1529             }
1530             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1531                 chunk = MIN(chunk, IO_BUF_SIZE);
1532                 ret = check_empty_sectors(blk_over, offset, chunk,
1533                                           filename_over, buf1, quiet);
1534                 if (ret) {
1535                     goto out;
1536                 }
1537             }
1538             offset += chunk;
1539             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1540         }
1541     }
1542 
1543     qprintf(quiet, "Images are identical.\n");
1544     ret = 0;
1545 
1546 out:
1547     qemu_vfree(buf1);
1548     qemu_vfree(buf2);
1549     blk_unref(blk2);
1550 out2:
1551     blk_unref(blk1);
1552 out3:
1553     qemu_progress_end();
1554 out4:
1555     return ret;
1556 }
1557 
1558 enum ImgConvertBlockStatus {
1559     BLK_DATA,
1560     BLK_ZERO,
1561     BLK_BACKING_FILE,
1562 };
1563 
1564 #define MAX_COROUTINES 16
1565 
1566 typedef struct ImgConvertState {
1567     BlockBackend **src;
1568     int64_t *src_sectors;
1569     int src_num;
1570     int64_t total_sectors;
1571     int64_t allocated_sectors;
1572     int64_t allocated_done;
1573     int64_t sector_num;
1574     int64_t wr_offs;
1575     enum ImgConvertBlockStatus status;
1576     int64_t sector_next_status;
1577     BlockBackend *target;
1578     bool has_zero_init;
1579     bool compressed;
1580     bool unallocated_blocks_are_zero;
1581     bool target_is_new;
1582     bool target_has_backing;
1583     int64_t target_backing_sectors; /* negative if unknown */
1584     bool wr_in_order;
1585     bool copy_range;
1586     bool salvage;
1587     bool quiet;
1588     int min_sparse;
1589     int alignment;
1590     size_t cluster_sectors;
1591     size_t buf_sectors;
1592     long num_coroutines;
1593     int running_coroutines;
1594     Coroutine *co[MAX_COROUTINES];
1595     int64_t wait_sector_num[MAX_COROUTINES];
1596     CoMutex lock;
1597     int ret;
1598 } ImgConvertState;
1599 
1600 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1601                                 int *src_cur, int64_t *src_cur_offset)
1602 {
1603     *src_cur = 0;
1604     *src_cur_offset = 0;
1605     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1606         *src_cur_offset += s->src_sectors[*src_cur];
1607         (*src_cur)++;
1608         assert(*src_cur < s->src_num);
1609     }
1610 }
1611 
1612 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1613 {
1614     int64_t src_cur_offset;
1615     int ret, n, src_cur;
1616     bool post_backing_zero = false;
1617 
1618     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1619 
1620     assert(s->total_sectors > sector_num);
1621     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1622 
1623     if (s->target_backing_sectors >= 0) {
1624         if (sector_num >= s->target_backing_sectors) {
1625             post_backing_zero = s->unallocated_blocks_are_zero;
1626         } else if (sector_num + n > s->target_backing_sectors) {
1627             /* Split requests around target_backing_sectors (because
1628              * starting from there, zeros are handled differently) */
1629             n = s->target_backing_sectors - sector_num;
1630         }
1631     }
1632 
1633     if (s->sector_next_status <= sector_num) {
1634         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1635         int64_t count;
1636 
1637         do {
1638             count = n * BDRV_SECTOR_SIZE;
1639 
1640             if (s->target_has_backing) {
1641                 ret = bdrv_block_status(blk_bs(s->src[src_cur]), offset,
1642                                         count, &count, NULL, NULL);
1643             } else {
1644                 ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL,
1645                                               offset, count, &count, NULL,
1646                                               NULL);
1647             }
1648 
1649             if (ret < 0) {
1650                 if (s->salvage) {
1651                     if (n == 1) {
1652                         if (!s->quiet) {
1653                             warn_report("error while reading block status at "
1654                                         "offset %" PRIu64 ": %s", offset,
1655                                         strerror(-ret));
1656                         }
1657                         /* Just try to read the data, then */
1658                         ret = BDRV_BLOCK_DATA;
1659                         count = BDRV_SECTOR_SIZE;
1660                     } else {
1661                         /* Retry on a shorter range */
1662                         n = DIV_ROUND_UP(n, 4);
1663                     }
1664                 } else {
1665                     error_report("error while reading block status at offset "
1666                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1667                     return ret;
1668                 }
1669             }
1670         } while (ret < 0);
1671 
1672         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1673 
1674         if (ret & BDRV_BLOCK_ZERO) {
1675             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1676         } else if (ret & BDRV_BLOCK_DATA) {
1677             s->status = BLK_DATA;
1678         } else {
1679             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1680         }
1681 
1682         s->sector_next_status = sector_num + n;
1683     }
1684 
1685     n = MIN(n, s->sector_next_status - sector_num);
1686     if (s->status == BLK_DATA) {
1687         n = MIN(n, s->buf_sectors);
1688     }
1689 
1690     /* We need to write complete clusters for compressed images, so if an
1691      * unallocated area is shorter than that, we must consider the whole
1692      * cluster allocated. */
1693     if (s->compressed) {
1694         if (n < s->cluster_sectors) {
1695             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1696             s->status = BLK_DATA;
1697         } else {
1698             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1699         }
1700     }
1701 
1702     return n;
1703 }
1704 
1705 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1706                                         int nb_sectors, uint8_t *buf)
1707 {
1708     uint64_t single_read_until = 0;
1709     int n, ret;
1710 
1711     assert(nb_sectors <= s->buf_sectors);
1712     while (nb_sectors > 0) {
1713         BlockBackend *blk;
1714         int src_cur;
1715         int64_t bs_sectors, src_cur_offset;
1716         uint64_t offset;
1717 
1718         /* In the case of compression with multiple source files, we can get a
1719          * nb_sectors that spreads into the next part. So we must be able to
1720          * read across multiple BDSes for one convert_read() call. */
1721         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1722         blk = s->src[src_cur];
1723         bs_sectors = s->src_sectors[src_cur];
1724 
1725         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1726 
1727         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1728         if (single_read_until > offset) {
1729             n = 1;
1730         }
1731 
1732         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1733         if (ret < 0) {
1734             if (s->salvage) {
1735                 if (n > 1) {
1736                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1737                     continue;
1738                 } else {
1739                     if (!s->quiet) {
1740                         warn_report("error while reading offset %" PRIu64
1741                                     ": %s", offset, strerror(-ret));
1742                     }
1743                     memset(buf, 0, BDRV_SECTOR_SIZE);
1744                 }
1745             } else {
1746                 return ret;
1747             }
1748         }
1749 
1750         sector_num += n;
1751         nb_sectors -= n;
1752         buf += n * BDRV_SECTOR_SIZE;
1753     }
1754 
1755     return 0;
1756 }
1757 
1758 
1759 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1760                                          int nb_sectors, uint8_t *buf,
1761                                          enum ImgConvertBlockStatus status)
1762 {
1763     int ret;
1764 
1765     while (nb_sectors > 0) {
1766         int n = nb_sectors;
1767         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1768 
1769         switch (status) {
1770         case BLK_BACKING_FILE:
1771             /* If we have a backing file, leave clusters unallocated that are
1772              * unallocated in the source image, so that the backing file is
1773              * visible at the respective offset. */
1774             assert(s->target_has_backing);
1775             break;
1776 
1777         case BLK_DATA:
1778             /* If we're told to keep the target fully allocated (-S 0) or there
1779              * is real non-zero data, we must write it. Otherwise we can treat
1780              * it as zero sectors.
1781              * Compressed clusters need to be written as a whole, so in that
1782              * case we can only save the write if the buffer is completely
1783              * zeroed. */
1784             if (!s->min_sparse ||
1785                 (!s->compressed &&
1786                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1787                                           sector_num, s->alignment)) ||
1788                 (s->compressed &&
1789                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1790             {
1791                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1792                                     n << BDRV_SECTOR_BITS, buf, flags);
1793                 if (ret < 0) {
1794                     return ret;
1795                 }
1796                 break;
1797             }
1798             /* fall-through */
1799 
1800         case BLK_ZERO:
1801             if (s->has_zero_init) {
1802                 assert(!s->target_has_backing);
1803                 break;
1804             }
1805             ret = blk_co_pwrite_zeroes(s->target,
1806                                        sector_num << BDRV_SECTOR_BITS,
1807                                        n << BDRV_SECTOR_BITS,
1808                                        BDRV_REQ_MAY_UNMAP);
1809             if (ret < 0) {
1810                 return ret;
1811             }
1812             break;
1813         }
1814 
1815         sector_num += n;
1816         nb_sectors -= n;
1817         buf += n * BDRV_SECTOR_SIZE;
1818     }
1819 
1820     return 0;
1821 }
1822 
1823 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1824                                               int nb_sectors)
1825 {
1826     int n, ret;
1827 
1828     while (nb_sectors > 0) {
1829         BlockBackend *blk;
1830         int src_cur;
1831         int64_t bs_sectors, src_cur_offset;
1832         int64_t offset;
1833 
1834         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1835         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1836         blk = s->src[src_cur];
1837         bs_sectors = s->src_sectors[src_cur];
1838 
1839         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1840 
1841         ret = blk_co_copy_range(blk, offset, s->target,
1842                                 sector_num << BDRV_SECTOR_BITS,
1843                                 n << BDRV_SECTOR_BITS, 0, 0);
1844         if (ret < 0) {
1845             return ret;
1846         }
1847 
1848         sector_num += n;
1849         nb_sectors -= n;
1850     }
1851     return 0;
1852 }
1853 
1854 static void coroutine_fn convert_co_do_copy(void *opaque)
1855 {
1856     ImgConvertState *s = opaque;
1857     uint8_t *buf = NULL;
1858     int ret, i;
1859     int index = -1;
1860 
1861     for (i = 0; i < s->num_coroutines; i++) {
1862         if (s->co[i] == qemu_coroutine_self()) {
1863             index = i;
1864             break;
1865         }
1866     }
1867     assert(index >= 0);
1868 
1869     s->running_coroutines++;
1870     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1871 
1872     while (1) {
1873         int n;
1874         int64_t sector_num;
1875         enum ImgConvertBlockStatus status;
1876         bool copy_range;
1877 
1878         qemu_co_mutex_lock(&s->lock);
1879         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1880             qemu_co_mutex_unlock(&s->lock);
1881             break;
1882         }
1883         n = convert_iteration_sectors(s, s->sector_num);
1884         if (n < 0) {
1885             qemu_co_mutex_unlock(&s->lock);
1886             s->ret = n;
1887             break;
1888         }
1889         /* save current sector and allocation status to local variables */
1890         sector_num = s->sector_num;
1891         status = s->status;
1892         if (!s->min_sparse && s->status == BLK_ZERO) {
1893             n = MIN(n, s->buf_sectors);
1894         }
1895         /* increment global sector counter so that other coroutines can
1896          * already continue reading beyond this request */
1897         s->sector_num += n;
1898         qemu_co_mutex_unlock(&s->lock);
1899 
1900         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
1901             s->allocated_done += n;
1902             qemu_progress_print(100.0 * s->allocated_done /
1903                                         s->allocated_sectors, 0);
1904         }
1905 
1906 retry:
1907         copy_range = s->copy_range && s->status == BLK_DATA;
1908         if (status == BLK_DATA && !copy_range) {
1909             ret = convert_co_read(s, sector_num, n, buf);
1910             if (ret < 0) {
1911                 error_report("error while reading sector %" PRId64
1912                              ": %s", sector_num, strerror(-ret));
1913                 s->ret = ret;
1914             }
1915         } else if (!s->min_sparse && status == BLK_ZERO) {
1916             status = BLK_DATA;
1917             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
1918         }
1919 
1920         if (s->wr_in_order) {
1921             /* keep writes in order */
1922             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
1923                 s->wait_sector_num[index] = sector_num;
1924                 qemu_coroutine_yield();
1925             }
1926             s->wait_sector_num[index] = -1;
1927         }
1928 
1929         if (s->ret == -EINPROGRESS) {
1930             if (copy_range) {
1931                 ret = convert_co_copy_range(s, sector_num, n);
1932                 if (ret) {
1933                     s->copy_range = false;
1934                     goto retry;
1935                 }
1936             } else {
1937                 ret = convert_co_write(s, sector_num, n, buf, status);
1938             }
1939             if (ret < 0) {
1940                 error_report("error while writing sector %" PRId64
1941                              ": %s", sector_num, strerror(-ret));
1942                 s->ret = ret;
1943             }
1944         }
1945 
1946         if (s->wr_in_order) {
1947             /* reenter the coroutine that might have waited
1948              * for this write to complete */
1949             s->wr_offs = sector_num + n;
1950             for (i = 0; i < s->num_coroutines; i++) {
1951                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
1952                     /*
1953                      * A -> B -> A cannot occur because A has
1954                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
1955                      * B will never enter A during this time window.
1956                      */
1957                     qemu_coroutine_enter(s->co[i]);
1958                     break;
1959                 }
1960             }
1961         }
1962     }
1963 
1964     qemu_vfree(buf);
1965     s->co[index] = NULL;
1966     s->running_coroutines--;
1967     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
1968         /* the convert job finished successfully */
1969         s->ret = 0;
1970     }
1971 }
1972 
1973 static int convert_do_copy(ImgConvertState *s)
1974 {
1975     int ret, i, n;
1976     int64_t sector_num = 0;
1977 
1978     /* Check whether we have zero initialisation or can get it efficiently */
1979     if (s->target_is_new && s->min_sparse && !s->target_has_backing) {
1980         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
1981     } else {
1982         s->has_zero_init = false;
1983     }
1984 
1985     if (!s->has_zero_init && !s->target_has_backing &&
1986         bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
1987     {
1988         ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
1989         if (ret == 0) {
1990             s->has_zero_init = true;
1991         }
1992     }
1993 
1994     /* Allocate buffer for copied data. For compressed images, only one cluster
1995      * can be copied at a time. */
1996     if (s->compressed) {
1997         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
1998             error_report("invalid cluster size");
1999             return -EINVAL;
2000         }
2001         s->buf_sectors = s->cluster_sectors;
2002     }
2003 
2004     while (sector_num < s->total_sectors) {
2005         n = convert_iteration_sectors(s, sector_num);
2006         if (n < 0) {
2007             return n;
2008         }
2009         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2010         {
2011             s->allocated_sectors += n;
2012         }
2013         sector_num += n;
2014     }
2015 
2016     /* Do the copy */
2017     s->sector_next_status = 0;
2018     s->ret = -EINPROGRESS;
2019 
2020     qemu_co_mutex_init(&s->lock);
2021     for (i = 0; i < s->num_coroutines; i++) {
2022         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2023         s->wait_sector_num[i] = -1;
2024         qemu_coroutine_enter(s->co[i]);
2025     }
2026 
2027     while (s->running_coroutines) {
2028         main_loop_wait(false);
2029     }
2030 
2031     if (s->compressed && !s->ret) {
2032         /* signal EOF to align */
2033         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
2034         if (ret < 0) {
2035             return ret;
2036         }
2037     }
2038 
2039     return s->ret;
2040 }
2041 
2042 #define MAX_BUF_SECTORS 32768
2043 
2044 static int img_convert(int argc, char **argv)
2045 {
2046     int c, bs_i, flags, src_flags = 0;
2047     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2048                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2049                *out_filename, *out_baseimg_param, *snapshot_name = NULL;
2050     BlockDriver *drv = NULL, *proto_drv = NULL;
2051     BlockDriverInfo bdi;
2052     BlockDriverState *out_bs;
2053     QemuOpts *opts = NULL, *sn_opts = NULL;
2054     QemuOptsList *create_opts = NULL;
2055     QDict *open_opts = NULL;
2056     char *options = NULL;
2057     Error *local_err = NULL;
2058     bool writethrough, src_writethrough, image_opts = false,
2059          skip_create = false, progress = false, tgt_image_opts = false;
2060     int64_t ret = -EINVAL;
2061     bool force_share = false;
2062     bool explict_min_sparse = false;
2063 
2064     ImgConvertState s = (ImgConvertState) {
2065         /* Need at least 4k of zeros for sparse detection */
2066         .min_sparse         = 8,
2067         .copy_range         = false,
2068         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2069         .wr_in_order        = true,
2070         .num_coroutines     = 8,
2071     };
2072 
2073     for(;;) {
2074         static const struct option long_options[] = {
2075             {"help", no_argument, 0, 'h'},
2076             {"object", required_argument, 0, OPTION_OBJECT},
2077             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2078             {"force-share", no_argument, 0, 'U'},
2079             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2080             {"salvage", no_argument, 0, OPTION_SALVAGE},
2081             {0, 0, 0, 0}
2082         };
2083         c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU",
2084                         long_options, NULL);
2085         if (c == -1) {
2086             break;
2087         }
2088         switch(c) {
2089         case ':':
2090             missing_argument(argv[optind - 1]);
2091             break;
2092         case '?':
2093             unrecognized_option(argv[optind - 1]);
2094             break;
2095         case 'h':
2096             help();
2097             break;
2098         case 'f':
2099             fmt = optarg;
2100             break;
2101         case 'O':
2102             out_fmt = optarg;
2103             break;
2104         case 'B':
2105             out_baseimg = optarg;
2106             break;
2107         case 'C':
2108             s.copy_range = true;
2109             break;
2110         case 'c':
2111             s.compressed = true;
2112             break;
2113         case 'o':
2114             if (!is_valid_option_list(optarg)) {
2115                 error_report("Invalid option list: %s", optarg);
2116                 goto fail_getopt;
2117             }
2118             if (!options) {
2119                 options = g_strdup(optarg);
2120             } else {
2121                 char *old_options = options;
2122                 options = g_strdup_printf("%s,%s", options, optarg);
2123                 g_free(old_options);
2124             }
2125             break;
2126         case 'l':
2127             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2128                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2129                                                   optarg, false);
2130                 if (!sn_opts) {
2131                     error_report("Failed in parsing snapshot param '%s'",
2132                                  optarg);
2133                     goto fail_getopt;
2134                 }
2135             } else {
2136                 snapshot_name = optarg;
2137             }
2138             break;
2139         case 'S':
2140         {
2141             int64_t sval;
2142 
2143             sval = cvtnum(optarg);
2144             if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2145                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2146                 error_report("Invalid buffer size for sparse output specified. "
2147                     "Valid sizes are multiples of %llu up to %llu. Select "
2148                     "0 to disable sparse detection (fully allocates output).",
2149                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2150                 goto fail_getopt;
2151             }
2152 
2153             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2154             explict_min_sparse = true;
2155             break;
2156         }
2157         case 'p':
2158             progress = true;
2159             break;
2160         case 't':
2161             cache = optarg;
2162             break;
2163         case 'T':
2164             src_cache = optarg;
2165             break;
2166         case 'q':
2167             s.quiet = true;
2168             break;
2169         case 'n':
2170             skip_create = true;
2171             break;
2172         case 'm':
2173             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2174                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2175                 error_report("Invalid number of coroutines. Allowed number of"
2176                              " coroutines is between 1 and %d", MAX_COROUTINES);
2177                 goto fail_getopt;
2178             }
2179             break;
2180         case 'W':
2181             s.wr_in_order = false;
2182             break;
2183         case 'U':
2184             force_share = true;
2185             break;
2186         case OPTION_OBJECT: {
2187             QemuOpts *object_opts;
2188             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
2189                                                   optarg, true);
2190             if (!object_opts) {
2191                 goto fail_getopt;
2192             }
2193             break;
2194         }
2195         case OPTION_IMAGE_OPTS:
2196             image_opts = true;
2197             break;
2198         case OPTION_SALVAGE:
2199             s.salvage = true;
2200             break;
2201         case OPTION_TARGET_IMAGE_OPTS:
2202             tgt_image_opts = true;
2203             break;
2204         }
2205     }
2206 
2207     if (!out_fmt && !tgt_image_opts) {
2208         out_fmt = "raw";
2209     }
2210 
2211     if (qemu_opts_foreach(&qemu_object_opts,
2212                           user_creatable_add_opts_foreach,
2213                           NULL, &error_fatal)) {
2214         goto fail_getopt;
2215     }
2216 
2217     if (s.compressed && s.copy_range) {
2218         error_report("Cannot enable copy offloading when -c is used");
2219         goto fail_getopt;
2220     }
2221 
2222     if (explict_min_sparse && s.copy_range) {
2223         error_report("Cannot enable copy offloading when -S is used");
2224         goto fail_getopt;
2225     }
2226 
2227     if (s.copy_range && s.salvage) {
2228         error_report("Cannot use copy offloading in salvaging mode");
2229         goto fail_getopt;
2230     }
2231 
2232     if (tgt_image_opts && !skip_create) {
2233         error_report("--target-image-opts requires use of -n flag");
2234         goto fail_getopt;
2235     }
2236 
2237     if (skip_create && options) {
2238         warn_report("-o has no effect when skipping image creation");
2239         warn_report("This will become an error in future QEMU versions.");
2240     }
2241 
2242     s.src_num = argc - optind - 1;
2243     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2244 
2245     if (options && has_help_option(options)) {
2246         if (out_fmt) {
2247             ret = print_block_option_help(out_filename, out_fmt);
2248             goto fail_getopt;
2249         } else {
2250             error_report("Option help requires a format be specified");
2251             goto fail_getopt;
2252         }
2253     }
2254 
2255     if (s.src_num < 1) {
2256         error_report("Must specify image file name");
2257         goto fail_getopt;
2258     }
2259 
2260 
2261     /* ret is still -EINVAL until here */
2262     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2263     if (ret < 0) {
2264         error_report("Invalid source cache option: %s", src_cache);
2265         goto fail_getopt;
2266     }
2267 
2268     /* Initialize before goto out */
2269     if (s.quiet) {
2270         progress = false;
2271     }
2272     qemu_progress_init(progress, 1.0);
2273     qemu_progress_print(0, 100);
2274 
2275     s.src = g_new0(BlockBackend *, s.src_num);
2276     s.src_sectors = g_new(int64_t, s.src_num);
2277 
2278     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2279         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2280                                fmt, src_flags, src_writethrough, s.quiet,
2281                                force_share);
2282         if (!s.src[bs_i]) {
2283             ret = -1;
2284             goto out;
2285         }
2286         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2287         if (s.src_sectors[bs_i] < 0) {
2288             error_report("Could not get size of %s: %s",
2289                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2290             ret = -1;
2291             goto out;
2292         }
2293         s.total_sectors += s.src_sectors[bs_i];
2294     }
2295 
2296     if (sn_opts) {
2297         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2298                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2299                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2300                                &local_err);
2301     } else if (snapshot_name != NULL) {
2302         if (s.src_num > 1) {
2303             error_report("No support for concatenating multiple snapshot");
2304             ret = -1;
2305             goto out;
2306         }
2307 
2308         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2309                                              &local_err);
2310     }
2311     if (local_err) {
2312         error_reportf_err(local_err, "Failed to load snapshot: ");
2313         ret = -1;
2314         goto out;
2315     }
2316 
2317     if (!skip_create) {
2318         /* Find driver and parse its options */
2319         drv = bdrv_find_format(out_fmt);
2320         if (!drv) {
2321             error_report("Unknown file format '%s'", out_fmt);
2322             ret = -1;
2323             goto out;
2324         }
2325 
2326         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2327         if (!proto_drv) {
2328             error_report_err(local_err);
2329             ret = -1;
2330             goto out;
2331         }
2332 
2333         if (!drv->create_opts) {
2334             error_report("Format driver '%s' does not support image creation",
2335                          drv->format_name);
2336             ret = -1;
2337             goto out;
2338         }
2339 
2340         if (!proto_drv->create_opts) {
2341             error_report("Protocol driver '%s' does not support image creation",
2342                          proto_drv->format_name);
2343             ret = -1;
2344             goto out;
2345         }
2346 
2347         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2348         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2349 
2350         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2351         if (options) {
2352             qemu_opts_do_parse(opts, options, NULL, &local_err);
2353             if (local_err) {
2354                 error_report_err(local_err);
2355                 ret = -1;
2356                 goto out;
2357             }
2358         }
2359 
2360         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s.total_sectors * 512,
2361                             &error_abort);
2362         ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2363         if (ret < 0) {
2364             goto out;
2365         }
2366     }
2367 
2368     /* Get backing file name if -o backing_file was used */
2369     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2370     if (out_baseimg_param) {
2371         out_baseimg = out_baseimg_param;
2372     }
2373     s.target_has_backing = (bool) out_baseimg;
2374 
2375     if (s.src_num > 1 && out_baseimg) {
2376         error_report("Having a backing file for the target makes no sense when "
2377                      "concatenating multiple input images");
2378         ret = -1;
2379         goto out;
2380     }
2381 
2382     /* Check if compression is supported */
2383     if (s.compressed) {
2384         bool encryption =
2385             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2386         const char *encryptfmt =
2387             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2388         const char *preallocation =
2389             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2390 
2391         if (drv && !block_driver_can_compress(drv)) {
2392             error_report("Compression not supported for this file format");
2393             ret = -1;
2394             goto out;
2395         }
2396 
2397         if (encryption || encryptfmt) {
2398             error_report("Compression and encryption not supported at "
2399                          "the same time");
2400             ret = -1;
2401             goto out;
2402         }
2403 
2404         if (preallocation
2405             && strcmp(preallocation, "off"))
2406         {
2407             error_report("Compression and preallocation not supported at "
2408                          "the same time");
2409             ret = -1;
2410             goto out;
2411         }
2412     }
2413 
2414     /*
2415      * The later open call will need any decryption secrets, and
2416      * bdrv_create() will purge "opts", so extract them now before
2417      * they are lost.
2418      */
2419     if (!skip_create) {
2420         open_opts = qdict_new();
2421         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2422     }
2423 
2424     if (!skip_create) {
2425         /* Create the new image */
2426         ret = bdrv_create(drv, out_filename, opts, &local_err);
2427         if (ret < 0) {
2428             error_reportf_err(local_err, "%s: error while converting %s: ",
2429                               out_filename, out_fmt);
2430             goto out;
2431         }
2432     }
2433 
2434     s.target_is_new = !skip_create;
2435 
2436     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2437     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2438     if (ret < 0) {
2439         error_report("Invalid cache option: %s", cache);
2440         goto out;
2441     }
2442 
2443     if (skip_create) {
2444         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2445                             flags, writethrough, s.quiet, false);
2446     } else {
2447         /* TODO ultimately we should allow --target-image-opts
2448          * to be used even when -n is not given.
2449          * That has to wait for bdrv_create to be improved
2450          * to allow filenames in option syntax
2451          */
2452         s.target = img_open_file(out_filename, open_opts, out_fmt,
2453                                  flags, writethrough, s.quiet, false);
2454         open_opts = NULL; /* blk_new_open will have freed it */
2455     }
2456     if (!s.target) {
2457         ret = -1;
2458         goto out;
2459     }
2460     out_bs = blk_bs(s.target);
2461 
2462     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2463         error_report("Compression not supported for this file format");
2464         ret = -1;
2465         goto out;
2466     }
2467 
2468     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2469      * or discard_alignment of the out_bs is greater. Limit to
2470      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2471     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2472                         MAX(s.buf_sectors,
2473                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2474                                 out_bs->bl.pdiscard_alignment >>
2475                                 BDRV_SECTOR_BITS)));
2476 
2477     /* try to align the write requests to the destination to avoid unnecessary
2478      * RMW cycles. */
2479     s.alignment = MAX(pow2floor(s.min_sparse),
2480                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2481                                    BDRV_SECTOR_SIZE));
2482     assert(is_power_of_2(s.alignment));
2483 
2484     if (skip_create) {
2485         int64_t output_sectors = blk_nb_sectors(s.target);
2486         if (output_sectors < 0) {
2487             error_report("unable to get output image length: %s",
2488                          strerror(-output_sectors));
2489             ret = -1;
2490             goto out;
2491         } else if (output_sectors < s.total_sectors) {
2492             error_report("output file is smaller than input file");
2493             ret = -1;
2494             goto out;
2495         }
2496     }
2497 
2498     if (s.target_has_backing) {
2499         /* Errors are treated as "backing length unknown" (which means
2500          * s.target_backing_sectors has to be negative, which it will
2501          * be automatically).  The backing file length is used only
2502          * for optimizations, so such a case is not fatal. */
2503         s.target_backing_sectors = bdrv_nb_sectors(out_bs->backing->bs);
2504     } else {
2505         s.target_backing_sectors = -1;
2506     }
2507 
2508     ret = bdrv_get_info(out_bs, &bdi);
2509     if (ret < 0) {
2510         if (s.compressed) {
2511             error_report("could not get block driver info");
2512             goto out;
2513         }
2514     } else {
2515         s.compressed = s.compressed || bdi.needs_compressed_writes;
2516         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2517         s.unallocated_blocks_are_zero = bdi.unallocated_blocks_are_zero;
2518     }
2519 
2520     ret = convert_do_copy(&s);
2521 out:
2522     if (!ret) {
2523         qemu_progress_print(100, 0);
2524     }
2525     qemu_progress_end();
2526     qemu_opts_del(opts);
2527     qemu_opts_free(create_opts);
2528     qemu_opts_del(sn_opts);
2529     qobject_unref(open_opts);
2530     blk_unref(s.target);
2531     if (s.src) {
2532         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2533             blk_unref(s.src[bs_i]);
2534         }
2535         g_free(s.src);
2536     }
2537     g_free(s.src_sectors);
2538 fail_getopt:
2539     g_free(options);
2540 
2541     return !!ret;
2542 }
2543 
2544 
2545 static void dump_snapshots(BlockDriverState *bs)
2546 {
2547     QEMUSnapshotInfo *sn_tab, *sn;
2548     int nb_sns, i;
2549 
2550     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2551     if (nb_sns <= 0)
2552         return;
2553     printf("Snapshot list:\n");
2554     bdrv_snapshot_dump(NULL);
2555     printf("\n");
2556     for(i = 0; i < nb_sns; i++) {
2557         sn = &sn_tab[i];
2558         bdrv_snapshot_dump(sn);
2559         printf("\n");
2560     }
2561     g_free(sn_tab);
2562 }
2563 
2564 static void dump_json_image_info_list(ImageInfoList *list)
2565 {
2566     QString *str;
2567     QObject *obj;
2568     Visitor *v = qobject_output_visitor_new(&obj);
2569 
2570     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2571     visit_complete(v, &obj);
2572     str = qobject_to_json_pretty(obj);
2573     assert(str != NULL);
2574     printf("%s\n", qstring_get_str(str));
2575     qobject_unref(obj);
2576     visit_free(v);
2577     qobject_unref(str);
2578 }
2579 
2580 static void dump_json_image_info(ImageInfo *info)
2581 {
2582     QString *str;
2583     QObject *obj;
2584     Visitor *v = qobject_output_visitor_new(&obj);
2585 
2586     visit_type_ImageInfo(v, NULL, &info, &error_abort);
2587     visit_complete(v, &obj);
2588     str = qobject_to_json_pretty(obj);
2589     assert(str != NULL);
2590     printf("%s\n", qstring_get_str(str));
2591     qobject_unref(obj);
2592     visit_free(v);
2593     qobject_unref(str);
2594 }
2595 
2596 static void dump_human_image_info_list(ImageInfoList *list)
2597 {
2598     ImageInfoList *elem;
2599     bool delim = false;
2600 
2601     for (elem = list; elem; elem = elem->next) {
2602         if (delim) {
2603             printf("\n");
2604         }
2605         delim = true;
2606 
2607         bdrv_image_info_dump(elem->value);
2608     }
2609 }
2610 
2611 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2612 {
2613     return strcmp(a, b) == 0;
2614 }
2615 
2616 /**
2617  * Open an image file chain and return an ImageInfoList
2618  *
2619  * @filename: topmost image filename
2620  * @fmt: topmost image format (may be NULL to autodetect)
2621  * @chain: true  - enumerate entire backing file chain
2622  *         false - only topmost image file
2623  *
2624  * Returns a list of ImageInfo objects or NULL if there was an error opening an
2625  * image file.  If there was an error a message will have been printed to
2626  * stderr.
2627  */
2628 static ImageInfoList *collect_image_info_list(bool image_opts,
2629                                               const char *filename,
2630                                               const char *fmt,
2631                                               bool chain, bool force_share)
2632 {
2633     ImageInfoList *head = NULL;
2634     ImageInfoList **last = &head;
2635     GHashTable *filenames;
2636     Error *err = NULL;
2637 
2638     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2639 
2640     while (filename) {
2641         BlockBackend *blk;
2642         BlockDriverState *bs;
2643         ImageInfo *info;
2644         ImageInfoList *elem;
2645 
2646         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2647             error_report("Backing file '%s' creates an infinite loop.",
2648                          filename);
2649             goto err;
2650         }
2651         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2652 
2653         blk = img_open(image_opts, filename, fmt,
2654                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2655                        force_share);
2656         if (!blk) {
2657             goto err;
2658         }
2659         bs = blk_bs(blk);
2660 
2661         bdrv_query_image_info(bs, &info, &err);
2662         if (err) {
2663             error_report_err(err);
2664             blk_unref(blk);
2665             goto err;
2666         }
2667 
2668         elem = g_new0(ImageInfoList, 1);
2669         elem->value = info;
2670         *last = elem;
2671         last = &elem->next;
2672 
2673         blk_unref(blk);
2674 
2675         filename = fmt = NULL;
2676         if (chain) {
2677             if (info->has_full_backing_filename) {
2678                 filename = info->full_backing_filename;
2679             } else if (info->has_backing_filename) {
2680                 error_report("Could not determine absolute backing filename,"
2681                              " but backing filename '%s' present",
2682                              info->backing_filename);
2683                 goto err;
2684             }
2685             if (info->has_backing_filename_format) {
2686                 fmt = info->backing_filename_format;
2687             }
2688         }
2689     }
2690     g_hash_table_destroy(filenames);
2691     return head;
2692 
2693 err:
2694     qapi_free_ImageInfoList(head);
2695     g_hash_table_destroy(filenames);
2696     return NULL;
2697 }
2698 
2699 static int img_info(int argc, char **argv)
2700 {
2701     int c;
2702     OutputFormat output_format = OFORMAT_HUMAN;
2703     bool chain = false;
2704     const char *filename, *fmt, *output;
2705     ImageInfoList *list;
2706     bool image_opts = false;
2707     bool force_share = false;
2708 
2709     fmt = NULL;
2710     output = NULL;
2711     for(;;) {
2712         int option_index = 0;
2713         static const struct option long_options[] = {
2714             {"help", no_argument, 0, 'h'},
2715             {"format", required_argument, 0, 'f'},
2716             {"output", required_argument, 0, OPTION_OUTPUT},
2717             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2718             {"object", required_argument, 0, OPTION_OBJECT},
2719             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2720             {"force-share", no_argument, 0, 'U'},
2721             {0, 0, 0, 0}
2722         };
2723         c = getopt_long(argc, argv, ":f:hU",
2724                         long_options, &option_index);
2725         if (c == -1) {
2726             break;
2727         }
2728         switch(c) {
2729         case ':':
2730             missing_argument(argv[optind - 1]);
2731             break;
2732         case '?':
2733             unrecognized_option(argv[optind - 1]);
2734             break;
2735         case 'h':
2736             help();
2737             break;
2738         case 'f':
2739             fmt = optarg;
2740             break;
2741         case 'U':
2742             force_share = true;
2743             break;
2744         case OPTION_OUTPUT:
2745             output = optarg;
2746             break;
2747         case OPTION_BACKING_CHAIN:
2748             chain = true;
2749             break;
2750         case OPTION_OBJECT: {
2751             QemuOpts *opts;
2752             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2753                                            optarg, true);
2754             if (!opts) {
2755                 return 1;
2756             }
2757         }   break;
2758         case OPTION_IMAGE_OPTS:
2759             image_opts = true;
2760             break;
2761         }
2762     }
2763     if (optind != argc - 1) {
2764         error_exit("Expecting one image file name");
2765     }
2766     filename = argv[optind++];
2767 
2768     if (output && !strcmp(output, "json")) {
2769         output_format = OFORMAT_JSON;
2770     } else if (output && !strcmp(output, "human")) {
2771         output_format = OFORMAT_HUMAN;
2772     } else if (output) {
2773         error_report("--output must be used with human or json as argument.");
2774         return 1;
2775     }
2776 
2777     if (qemu_opts_foreach(&qemu_object_opts,
2778                           user_creatable_add_opts_foreach,
2779                           NULL, &error_fatal)) {
2780         return 1;
2781     }
2782 
2783     list = collect_image_info_list(image_opts, filename, fmt, chain,
2784                                    force_share);
2785     if (!list) {
2786         return 1;
2787     }
2788 
2789     switch (output_format) {
2790     case OFORMAT_HUMAN:
2791         dump_human_image_info_list(list);
2792         break;
2793     case OFORMAT_JSON:
2794         if (chain) {
2795             dump_json_image_info_list(list);
2796         } else {
2797             dump_json_image_info(list->value);
2798         }
2799         break;
2800     }
2801 
2802     qapi_free_ImageInfoList(list);
2803     return 0;
2804 }
2805 
2806 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
2807                           MapEntry *next)
2808 {
2809     switch (output_format) {
2810     case OFORMAT_HUMAN:
2811         if (e->data && !e->has_offset) {
2812             error_report("File contains external, encrypted or compressed clusters.");
2813             return -1;
2814         }
2815         if (e->data && !e->zero) {
2816             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2817                    e->start, e->length,
2818                    e->has_offset ? e->offset : 0,
2819                    e->has_filename ? e->filename : "");
2820         }
2821         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2822          * Modify the flags here to allow more coalescing.
2823          */
2824         if (next && (!next->data || next->zero)) {
2825             next->data = false;
2826             next->zero = true;
2827         }
2828         break;
2829     case OFORMAT_JSON:
2830         printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2831                " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2832                (e->start == 0 ? "[" : ",\n"),
2833                e->start, e->length, e->depth,
2834                e->zero ? "true" : "false",
2835                e->data ? "true" : "false");
2836         if (e->has_offset) {
2837             printf(", \"offset\": %"PRId64"", e->offset);
2838         }
2839         putchar('}');
2840 
2841         if (!next) {
2842             printf("]\n");
2843         }
2844         break;
2845     }
2846     return 0;
2847 }
2848 
2849 static int get_block_status(BlockDriverState *bs, int64_t offset,
2850                             int64_t bytes, MapEntry *e)
2851 {
2852     int ret;
2853     int depth;
2854     BlockDriverState *file;
2855     bool has_offset;
2856     int64_t map;
2857     char *filename = NULL;
2858 
2859     /* As an optimization, we could cache the current range of unallocated
2860      * clusters in each file of the chain, and avoid querying the same
2861      * range repeatedly.
2862      */
2863 
2864     depth = 0;
2865     for (;;) {
2866         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
2867         if (ret < 0) {
2868             return ret;
2869         }
2870         assert(bytes);
2871         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2872             break;
2873         }
2874         bs = backing_bs(bs);
2875         if (bs == NULL) {
2876             ret = 0;
2877             break;
2878         }
2879 
2880         depth++;
2881     }
2882 
2883     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2884 
2885     if (file && has_offset) {
2886         bdrv_refresh_filename(file);
2887         filename = file->filename;
2888     }
2889 
2890     *e = (MapEntry) {
2891         .start = offset,
2892         .length = bytes,
2893         .data = !!(ret & BDRV_BLOCK_DATA),
2894         .zero = !!(ret & BDRV_BLOCK_ZERO),
2895         .offset = map,
2896         .has_offset = has_offset,
2897         .depth = depth,
2898         .has_filename = filename,
2899         .filename = filename,
2900     };
2901 
2902     return 0;
2903 }
2904 
2905 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2906 {
2907     if (curr->length == 0) {
2908         return false;
2909     }
2910     if (curr->zero != next->zero ||
2911         curr->data != next->data ||
2912         curr->depth != next->depth ||
2913         curr->has_filename != next->has_filename ||
2914         curr->has_offset != next->has_offset) {
2915         return false;
2916     }
2917     if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2918         return false;
2919     }
2920     if (curr->has_offset && curr->offset + curr->length != next->offset) {
2921         return false;
2922     }
2923     return true;
2924 }
2925 
2926 static int img_map(int argc, char **argv)
2927 {
2928     int c;
2929     OutputFormat output_format = OFORMAT_HUMAN;
2930     BlockBackend *blk;
2931     BlockDriverState *bs;
2932     const char *filename, *fmt, *output;
2933     int64_t length;
2934     MapEntry curr = { .length = 0 }, next;
2935     int ret = 0;
2936     bool image_opts = false;
2937     bool force_share = false;
2938 
2939     fmt = NULL;
2940     output = NULL;
2941     for (;;) {
2942         int option_index = 0;
2943         static const struct option long_options[] = {
2944             {"help", no_argument, 0, 'h'},
2945             {"format", required_argument, 0, 'f'},
2946             {"output", required_argument, 0, OPTION_OUTPUT},
2947             {"object", required_argument, 0, OPTION_OBJECT},
2948             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2949             {"force-share", no_argument, 0, 'U'},
2950             {0, 0, 0, 0}
2951         };
2952         c = getopt_long(argc, argv, ":f:hU",
2953                         long_options, &option_index);
2954         if (c == -1) {
2955             break;
2956         }
2957         switch (c) {
2958         case ':':
2959             missing_argument(argv[optind - 1]);
2960             break;
2961         case '?':
2962             unrecognized_option(argv[optind - 1]);
2963             break;
2964         case 'h':
2965             help();
2966             break;
2967         case 'f':
2968             fmt = optarg;
2969             break;
2970         case 'U':
2971             force_share = true;
2972             break;
2973         case OPTION_OUTPUT:
2974             output = optarg;
2975             break;
2976         case OPTION_OBJECT: {
2977             QemuOpts *opts;
2978             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2979                                            optarg, true);
2980             if (!opts) {
2981                 return 1;
2982             }
2983         }   break;
2984         case OPTION_IMAGE_OPTS:
2985             image_opts = true;
2986             break;
2987         }
2988     }
2989     if (optind != argc - 1) {
2990         error_exit("Expecting one image file name");
2991     }
2992     filename = argv[optind];
2993 
2994     if (output && !strcmp(output, "json")) {
2995         output_format = OFORMAT_JSON;
2996     } else if (output && !strcmp(output, "human")) {
2997         output_format = OFORMAT_HUMAN;
2998     } else if (output) {
2999         error_report("--output must be used with human or json as argument.");
3000         return 1;
3001     }
3002 
3003     if (qemu_opts_foreach(&qemu_object_opts,
3004                           user_creatable_add_opts_foreach,
3005                           NULL, &error_fatal)) {
3006         return 1;
3007     }
3008 
3009     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3010     if (!blk) {
3011         return 1;
3012     }
3013     bs = blk_bs(blk);
3014 
3015     if (output_format == OFORMAT_HUMAN) {
3016         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3017     }
3018 
3019     length = blk_getlength(blk);
3020     while (curr.start + curr.length < length) {
3021         int64_t offset = curr.start + curr.length;
3022         int64_t n;
3023 
3024         /* Probe up to 1 GiB at a time.  */
3025         n = MIN(1 * GiB, length - offset);
3026         ret = get_block_status(bs, offset, n, &next);
3027 
3028         if (ret < 0) {
3029             error_report("Could not read file metadata: %s", strerror(-ret));
3030             goto out;
3031         }
3032 
3033         if (entry_mergeable(&curr, &next)) {
3034             curr.length += next.length;
3035             continue;
3036         }
3037 
3038         if (curr.length > 0) {
3039             ret = dump_map_entry(output_format, &curr, &next);
3040             if (ret < 0) {
3041                 goto out;
3042             }
3043         }
3044         curr = next;
3045     }
3046 
3047     ret = dump_map_entry(output_format, &curr, NULL);
3048 
3049 out:
3050     blk_unref(blk);
3051     return ret < 0;
3052 }
3053 
3054 #define SNAPSHOT_LIST   1
3055 #define SNAPSHOT_CREATE 2
3056 #define SNAPSHOT_APPLY  3
3057 #define SNAPSHOT_DELETE 4
3058 
3059 static int img_snapshot(int argc, char **argv)
3060 {
3061     BlockBackend *blk;
3062     BlockDriverState *bs;
3063     QEMUSnapshotInfo sn;
3064     char *filename, *snapshot_name = NULL;
3065     int c, ret = 0, bdrv_oflags;
3066     int action = 0;
3067     qemu_timeval tv;
3068     bool quiet = false;
3069     Error *err = NULL;
3070     bool image_opts = false;
3071     bool force_share = false;
3072 
3073     bdrv_oflags = BDRV_O_RDWR;
3074     /* Parse commandline parameters */
3075     for(;;) {
3076         static const struct option long_options[] = {
3077             {"help", no_argument, 0, 'h'},
3078             {"object", required_argument, 0, OPTION_OBJECT},
3079             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3080             {"force-share", no_argument, 0, 'U'},
3081             {0, 0, 0, 0}
3082         };
3083         c = getopt_long(argc, argv, ":la:c:d:hqU",
3084                         long_options, NULL);
3085         if (c == -1) {
3086             break;
3087         }
3088         switch(c) {
3089         case ':':
3090             missing_argument(argv[optind - 1]);
3091             break;
3092         case '?':
3093             unrecognized_option(argv[optind - 1]);
3094             break;
3095         case 'h':
3096             help();
3097             return 0;
3098         case 'l':
3099             if (action) {
3100                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3101                 return 0;
3102             }
3103             action = SNAPSHOT_LIST;
3104             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3105             break;
3106         case 'a':
3107             if (action) {
3108                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3109                 return 0;
3110             }
3111             action = SNAPSHOT_APPLY;
3112             snapshot_name = optarg;
3113             break;
3114         case 'c':
3115             if (action) {
3116                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3117                 return 0;
3118             }
3119             action = SNAPSHOT_CREATE;
3120             snapshot_name = optarg;
3121             break;
3122         case 'd':
3123             if (action) {
3124                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3125                 return 0;
3126             }
3127             action = SNAPSHOT_DELETE;
3128             snapshot_name = optarg;
3129             break;
3130         case 'q':
3131             quiet = true;
3132             break;
3133         case 'U':
3134             force_share = true;
3135             break;
3136         case OPTION_OBJECT: {
3137             QemuOpts *opts;
3138             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3139                                            optarg, true);
3140             if (!opts) {
3141                 return 1;
3142             }
3143         }   break;
3144         case OPTION_IMAGE_OPTS:
3145             image_opts = true;
3146             break;
3147         }
3148     }
3149 
3150     if (optind != argc - 1) {
3151         error_exit("Expecting one image file name");
3152     }
3153     filename = argv[optind++];
3154 
3155     if (qemu_opts_foreach(&qemu_object_opts,
3156                           user_creatable_add_opts_foreach,
3157                           NULL, &error_fatal)) {
3158         return 1;
3159     }
3160 
3161     /* Open the image */
3162     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3163                    force_share);
3164     if (!blk) {
3165         return 1;
3166     }
3167     bs = blk_bs(blk);
3168 
3169     /* Perform the requested action */
3170     switch(action) {
3171     case SNAPSHOT_LIST:
3172         dump_snapshots(bs);
3173         break;
3174 
3175     case SNAPSHOT_CREATE:
3176         memset(&sn, 0, sizeof(sn));
3177         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3178 
3179         qemu_gettimeofday(&tv);
3180         sn.date_sec = tv.tv_sec;
3181         sn.date_nsec = tv.tv_usec * 1000;
3182 
3183         ret = bdrv_snapshot_create(bs, &sn);
3184         if (ret) {
3185             error_report("Could not create snapshot '%s': %d (%s)",
3186                 snapshot_name, ret, strerror(-ret));
3187         }
3188         break;
3189 
3190     case SNAPSHOT_APPLY:
3191         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3192         if (ret) {
3193             error_reportf_err(err, "Could not apply snapshot '%s': ",
3194                               snapshot_name);
3195         }
3196         break;
3197 
3198     case SNAPSHOT_DELETE:
3199         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3200         if (ret < 0) {
3201             error_report("Could not delete snapshot '%s': snapshot not "
3202                          "found", snapshot_name);
3203             ret = 1;
3204         } else {
3205             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3206             if (ret < 0) {
3207                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3208                                   snapshot_name);
3209                 ret = 1;
3210             }
3211         }
3212         break;
3213     }
3214 
3215     /* Cleanup */
3216     blk_unref(blk);
3217     if (ret) {
3218         return 1;
3219     }
3220     return 0;
3221 }
3222 
3223 static int img_rebase(int argc, char **argv)
3224 {
3225     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3226     uint8_t *buf_old = NULL;
3227     uint8_t *buf_new = NULL;
3228     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3229     char *filename;
3230     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3231     int c, flags, src_flags, ret;
3232     bool writethrough, src_writethrough;
3233     int unsafe = 0;
3234     bool force_share = false;
3235     int progress = 0;
3236     bool quiet = false;
3237     Error *local_err = NULL;
3238     bool image_opts = false;
3239 
3240     /* Parse commandline parameters */
3241     fmt = NULL;
3242     cache = BDRV_DEFAULT_CACHE;
3243     src_cache = BDRV_DEFAULT_CACHE;
3244     out_baseimg = NULL;
3245     out_basefmt = NULL;
3246     for(;;) {
3247         static const struct option long_options[] = {
3248             {"help", no_argument, 0, 'h'},
3249             {"object", required_argument, 0, OPTION_OBJECT},
3250             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3251             {"force-share", no_argument, 0, 'U'},
3252             {0, 0, 0, 0}
3253         };
3254         c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
3255                         long_options, NULL);
3256         if (c == -1) {
3257             break;
3258         }
3259         switch(c) {
3260         case ':':
3261             missing_argument(argv[optind - 1]);
3262             break;
3263         case '?':
3264             unrecognized_option(argv[optind - 1]);
3265             break;
3266         case 'h':
3267             help();
3268             return 0;
3269         case 'f':
3270             fmt = optarg;
3271             break;
3272         case 'F':
3273             out_basefmt = optarg;
3274             break;
3275         case 'b':
3276             out_baseimg = optarg;
3277             break;
3278         case 'u':
3279             unsafe = 1;
3280             break;
3281         case 'p':
3282             progress = 1;
3283             break;
3284         case 't':
3285             cache = optarg;
3286             break;
3287         case 'T':
3288             src_cache = optarg;
3289             break;
3290         case 'q':
3291             quiet = true;
3292             break;
3293         case OPTION_OBJECT: {
3294             QemuOpts *opts;
3295             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3296                                            optarg, true);
3297             if (!opts) {
3298                 return 1;
3299             }
3300         }   break;
3301         case OPTION_IMAGE_OPTS:
3302             image_opts = true;
3303             break;
3304         case 'U':
3305             force_share = true;
3306             break;
3307         }
3308     }
3309 
3310     if (quiet) {
3311         progress = 0;
3312     }
3313 
3314     if (optind != argc - 1) {
3315         error_exit("Expecting one image file name");
3316     }
3317     if (!unsafe && !out_baseimg) {
3318         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3319     }
3320     filename = argv[optind++];
3321 
3322     if (qemu_opts_foreach(&qemu_object_opts,
3323                           user_creatable_add_opts_foreach,
3324                           NULL, &error_fatal)) {
3325         return 1;
3326     }
3327 
3328     qemu_progress_init(progress, 2.0);
3329     qemu_progress_print(0, 100);
3330 
3331     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3332     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3333     if (ret < 0) {
3334         error_report("Invalid cache option: %s", cache);
3335         goto out;
3336     }
3337 
3338     src_flags = 0;
3339     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3340     if (ret < 0) {
3341         error_report("Invalid source cache option: %s", src_cache);
3342         goto out;
3343     }
3344 
3345     /* The source files are opened read-only, don't care about WCE */
3346     assert((src_flags & BDRV_O_RDWR) == 0);
3347     (void) src_writethrough;
3348 
3349     /*
3350      * Open the images.
3351      *
3352      * Ignore the old backing file for unsafe rebase in case we want to correct
3353      * the reference to a renamed or moved backing file.
3354      */
3355     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3356                    false);
3357     if (!blk) {
3358         ret = -1;
3359         goto out;
3360     }
3361     bs = blk_bs(blk);
3362 
3363     if (out_basefmt != NULL) {
3364         if (bdrv_find_format(out_basefmt) == NULL) {
3365             error_report("Invalid format name: '%s'", out_basefmt);
3366             ret = -1;
3367             goto out;
3368         }
3369     }
3370 
3371     /* For safe rebasing we need to compare old and new backing file */
3372     if (!unsafe) {
3373         QDict *options = NULL;
3374         BlockDriverState *base_bs = backing_bs(bs);
3375 
3376         if (base_bs) {
3377             blk_old_backing = blk_new(qemu_get_aio_context(),
3378                                       BLK_PERM_CONSISTENT_READ,
3379                                       BLK_PERM_ALL);
3380             ret = blk_insert_bs(blk_old_backing, base_bs,
3381                                 &local_err);
3382             if (ret < 0) {
3383                 error_reportf_err(local_err,
3384                                   "Could not reuse old backing file '%s': ",
3385                                   base_bs->filename);
3386                 goto out;
3387             }
3388         } else {
3389             blk_old_backing = NULL;
3390         }
3391 
3392         if (out_baseimg[0]) {
3393             const char *overlay_filename;
3394             char *out_real_path;
3395 
3396             options = qdict_new();
3397             if (out_basefmt) {
3398                 qdict_put_str(options, "driver", out_basefmt);
3399             }
3400             if (force_share) {
3401                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3402             }
3403 
3404             bdrv_refresh_filename(bs);
3405             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3406                                                      : bs->filename;
3407             out_real_path =
3408                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3409                                                              out_baseimg,
3410                                                              &local_err);
3411             if (local_err) {
3412                 qobject_unref(options);
3413                 error_reportf_err(local_err,
3414                                   "Could not resolve backing filename: ");
3415                 ret = -1;
3416                 goto out;
3417             }
3418 
3419             /*
3420              * Find out whether we rebase an image on top of a previous image
3421              * in its chain.
3422              */
3423             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3424             if (prefix_chain_bs) {
3425                 qobject_unref(options);
3426                 g_free(out_real_path);
3427 
3428                 blk_new_backing = blk_new(qemu_get_aio_context(),
3429                                           BLK_PERM_CONSISTENT_READ,
3430                                           BLK_PERM_ALL);
3431                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3432                                     &local_err);
3433                 if (ret < 0) {
3434                     error_reportf_err(local_err,
3435                                       "Could not reuse backing file '%s': ",
3436                                       out_baseimg);
3437                     goto out;
3438                 }
3439             } else {
3440                 blk_new_backing = blk_new_open(out_real_path, NULL,
3441                                                options, src_flags, &local_err);
3442                 g_free(out_real_path);
3443                 if (!blk_new_backing) {
3444                     error_reportf_err(local_err,
3445                                       "Could not open new backing file '%s': ",
3446                                       out_baseimg);
3447                     ret = -1;
3448                     goto out;
3449                 }
3450             }
3451         }
3452     }
3453 
3454     /*
3455      * Check each unallocated cluster in the COW file. If it is unallocated,
3456      * accesses go to the backing file. We must therefore compare this cluster
3457      * in the old and new backing file, and if they differ we need to copy it
3458      * from the old backing file into the COW file.
3459      *
3460      * If qemu-img crashes during this step, no harm is done. The content of
3461      * the image is the same as the original one at any time.
3462      */
3463     if (!unsafe) {
3464         int64_t size;
3465         int64_t old_backing_size = 0;
3466         int64_t new_backing_size = 0;
3467         uint64_t offset;
3468         int64_t n;
3469         float local_progress = 0;
3470 
3471         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3472         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3473 
3474         size = blk_getlength(blk);
3475         if (size < 0) {
3476             error_report("Could not get size of '%s': %s",
3477                          filename, strerror(-size));
3478             ret = -1;
3479             goto out;
3480         }
3481         if (blk_old_backing) {
3482             old_backing_size = blk_getlength(blk_old_backing);
3483             if (old_backing_size < 0) {
3484                 char backing_name[PATH_MAX];
3485 
3486                 bdrv_get_backing_filename(bs, backing_name,
3487                                           sizeof(backing_name));
3488                 error_report("Could not get size of '%s': %s",
3489                              backing_name, strerror(-old_backing_size));
3490                 ret = -1;
3491                 goto out;
3492             }
3493         }
3494         if (blk_new_backing) {
3495             new_backing_size = blk_getlength(blk_new_backing);
3496             if (new_backing_size < 0) {
3497                 error_report("Could not get size of '%s': %s",
3498                              out_baseimg, strerror(-new_backing_size));
3499                 ret = -1;
3500                 goto out;
3501             }
3502         }
3503 
3504         if (size != 0) {
3505             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3506         }
3507 
3508         for (offset = 0; offset < size; offset += n) {
3509             bool buf_old_is_zero = false;
3510 
3511             /* How many bytes can we handle with the next read? */
3512             n = MIN(IO_BUF_SIZE, size - offset);
3513 
3514             /* If the cluster is allocated, we don't need to take action */
3515             ret = bdrv_is_allocated(bs, offset, n, &n);
3516             if (ret < 0) {
3517                 error_report("error while reading image metadata: %s",
3518                              strerror(-ret));
3519                 goto out;
3520             }
3521             if (ret) {
3522                 continue;
3523             }
3524 
3525             if (prefix_chain_bs) {
3526                 /*
3527                  * If cluster wasn't changed since prefix_chain, we don't need
3528                  * to take action
3529                  */
3530                 ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
3531                                               false, offset, n, &n);
3532                 if (ret < 0) {
3533                     error_report("error while reading image metadata: %s",
3534                                  strerror(-ret));
3535                     goto out;
3536                 }
3537                 if (!ret) {
3538                     continue;
3539                 }
3540             }
3541 
3542             /*
3543              * Read old and new backing file and take into consideration that
3544              * backing files may be smaller than the COW image.
3545              */
3546             if (offset >= old_backing_size) {
3547                 memset(buf_old, 0, n);
3548                 buf_old_is_zero = true;
3549             } else {
3550                 if (offset + n > old_backing_size) {
3551                     n = old_backing_size - offset;
3552                 }
3553 
3554                 ret = blk_pread(blk_old_backing, offset, buf_old, n);
3555                 if (ret < 0) {
3556                     error_report("error while reading from old backing file");
3557                     goto out;
3558                 }
3559             }
3560 
3561             if (offset >= new_backing_size || !blk_new_backing) {
3562                 memset(buf_new, 0, n);
3563             } else {
3564                 if (offset + n > new_backing_size) {
3565                     n = new_backing_size - offset;
3566                 }
3567 
3568                 ret = blk_pread(blk_new_backing, offset, buf_new, n);
3569                 if (ret < 0) {
3570                     error_report("error while reading from new backing file");
3571                     goto out;
3572                 }
3573             }
3574 
3575             /* If they differ, we need to write to the COW file */
3576             uint64_t written = 0;
3577 
3578             while (written < n) {
3579                 int64_t pnum;
3580 
3581                 if (compare_buffers(buf_old + written, buf_new + written,
3582                                     n - written, &pnum))
3583                 {
3584                     if (buf_old_is_zero) {
3585                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
3586                     } else {
3587                         ret = blk_pwrite(blk, offset + written,
3588                                          buf_old + written, pnum, 0);
3589                     }
3590                     if (ret < 0) {
3591                         error_report("Error while writing to COW image: %s",
3592                             strerror(-ret));
3593                         goto out;
3594                     }
3595                 }
3596 
3597                 written += pnum;
3598             }
3599             qemu_progress_print(local_progress, 100);
3600         }
3601     }
3602 
3603     /*
3604      * Change the backing file. All clusters that are different from the old
3605      * backing file are overwritten in the COW file now, so the visible content
3606      * doesn't change when we switch the backing file.
3607      */
3608     if (out_baseimg && *out_baseimg) {
3609         ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3610     } else {
3611         ret = bdrv_change_backing_file(bs, NULL, NULL);
3612     }
3613 
3614     if (ret == -ENOSPC) {
3615         error_report("Could not change the backing file to '%s': No "
3616                      "space left in the file header", out_baseimg);
3617     } else if (ret < 0) {
3618         error_report("Could not change the backing file to '%s': %s",
3619             out_baseimg, strerror(-ret));
3620     }
3621 
3622     qemu_progress_print(100, 0);
3623     /*
3624      * TODO At this point it is possible to check if any clusters that are
3625      * allocated in the COW file are the same in the backing file. If so, they
3626      * could be dropped from the COW file. Don't do this before switching the
3627      * backing file, in case of a crash this would lead to corruption.
3628      */
3629 out:
3630     qemu_progress_end();
3631     /* Cleanup */
3632     if (!unsafe) {
3633         blk_unref(blk_old_backing);
3634         blk_unref(blk_new_backing);
3635     }
3636     qemu_vfree(buf_old);
3637     qemu_vfree(buf_new);
3638 
3639     blk_unref(blk);
3640     if (ret) {
3641         return 1;
3642     }
3643     return 0;
3644 }
3645 
3646 static int img_resize(int argc, char **argv)
3647 {
3648     Error *err = NULL;
3649     int c, ret, relative;
3650     const char *filename, *fmt, *size;
3651     int64_t n, total_size, current_size, new_size;
3652     bool quiet = false;
3653     BlockBackend *blk = NULL;
3654     PreallocMode prealloc = PREALLOC_MODE_OFF;
3655     QemuOpts *param;
3656 
3657     static QemuOptsList resize_options = {
3658         .name = "resize_options",
3659         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3660         .desc = {
3661             {
3662                 .name = BLOCK_OPT_SIZE,
3663                 .type = QEMU_OPT_SIZE,
3664                 .help = "Virtual disk size"
3665             }, {
3666                 /* end of list */
3667             }
3668         },
3669     };
3670     bool image_opts = false;
3671     bool shrink = false;
3672 
3673     /* Remove size from argv manually so that negative numbers are not treated
3674      * as options by getopt. */
3675     if (argc < 3) {
3676         error_exit("Not enough arguments");
3677         return 1;
3678     }
3679 
3680     size = argv[--argc];
3681 
3682     /* Parse getopt arguments */
3683     fmt = NULL;
3684     for(;;) {
3685         static const struct option long_options[] = {
3686             {"help", no_argument, 0, 'h'},
3687             {"object", required_argument, 0, OPTION_OBJECT},
3688             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3689             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
3690             {"shrink", no_argument, 0, OPTION_SHRINK},
3691             {0, 0, 0, 0}
3692         };
3693         c = getopt_long(argc, argv, ":f:hq",
3694                         long_options, NULL);
3695         if (c == -1) {
3696             break;
3697         }
3698         switch(c) {
3699         case ':':
3700             missing_argument(argv[optind - 1]);
3701             break;
3702         case '?':
3703             unrecognized_option(argv[optind - 1]);
3704             break;
3705         case 'h':
3706             help();
3707             break;
3708         case 'f':
3709             fmt = optarg;
3710             break;
3711         case 'q':
3712             quiet = true;
3713             break;
3714         case OPTION_OBJECT: {
3715             QemuOpts *opts;
3716             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3717                                            optarg, true);
3718             if (!opts) {
3719                 return 1;
3720             }
3721         }   break;
3722         case OPTION_IMAGE_OPTS:
3723             image_opts = true;
3724             break;
3725         case OPTION_PREALLOCATION:
3726             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
3727                                        PREALLOC_MODE__MAX, NULL);
3728             if (prealloc == PREALLOC_MODE__MAX) {
3729                 error_report("Invalid preallocation mode '%s'", optarg);
3730                 return 1;
3731             }
3732             break;
3733         case OPTION_SHRINK:
3734             shrink = true;
3735             break;
3736         }
3737     }
3738     if (optind != argc - 1) {
3739         error_exit("Expecting image file name and size");
3740     }
3741     filename = argv[optind++];
3742 
3743     if (qemu_opts_foreach(&qemu_object_opts,
3744                           user_creatable_add_opts_foreach,
3745                           NULL, &error_fatal)) {
3746         return 1;
3747     }
3748 
3749     /* Choose grow, shrink, or absolute resize mode */
3750     switch (size[0]) {
3751     case '+':
3752         relative = 1;
3753         size++;
3754         break;
3755     case '-':
3756         relative = -1;
3757         size++;
3758         break;
3759     default:
3760         relative = 0;
3761         break;
3762     }
3763 
3764     /* Parse size */
3765     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3766     qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3767     if (err) {
3768         error_report_err(err);
3769         ret = -1;
3770         qemu_opts_del(param);
3771         goto out;
3772     }
3773     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3774     qemu_opts_del(param);
3775 
3776     blk = img_open(image_opts, filename, fmt,
3777                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
3778                    false);
3779     if (!blk) {
3780         ret = -1;
3781         goto out;
3782     }
3783 
3784     current_size = blk_getlength(blk);
3785     if (current_size < 0) {
3786         error_report("Failed to inquire current image length: %s",
3787                      strerror(-current_size));
3788         ret = -1;
3789         goto out;
3790     }
3791 
3792     if (relative) {
3793         total_size = current_size + n * relative;
3794     } else {
3795         total_size = n;
3796     }
3797     if (total_size <= 0) {
3798         error_report("New image size must be positive");
3799         ret = -1;
3800         goto out;
3801     }
3802 
3803     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
3804         error_report("Preallocation can only be used for growing images");
3805         ret = -1;
3806         goto out;
3807     }
3808 
3809     if (total_size < current_size && !shrink) {
3810         warn_report("Shrinking an image will delete all data beyond the "
3811                     "shrunken image's end. Before performing such an "
3812                     "operation, make sure there is no important data there.");
3813 
3814         if (g_strcmp0(bdrv_get_format_name(blk_bs(blk)), "raw") != 0) {
3815             error_report(
3816               "Use the --shrink option to perform a shrink operation.");
3817             ret = -1;
3818             goto out;
3819         } else {
3820             warn_report("Using the --shrink option will suppress this message. "
3821                         "Note that future versions of qemu-img may refuse to "
3822                         "shrink images without this option.");
3823         }
3824     }
3825 
3826     ret = blk_truncate(blk, total_size, prealloc, &err);
3827     if (ret < 0) {
3828         error_report_err(err);
3829         goto out;
3830     }
3831 
3832     new_size = blk_getlength(blk);
3833     if (new_size < 0) {
3834         error_report("Failed to verify truncated image length: %s",
3835                      strerror(-new_size));
3836         ret = -1;
3837         goto out;
3838     }
3839 
3840     /* Some block drivers implement a truncation method, but only so
3841      * the user can cause qemu to refresh the image's size from disk.
3842      * The idea is that the user resizes the image outside of qemu and
3843      * then invokes block_resize to inform qemu about it.
3844      * (This includes iscsi and file-posix for device files.)
3845      * Of course, that is not the behavior someone invoking
3846      * qemu-img resize would find useful, so we catch that behavior
3847      * here and tell the user. */
3848     if (new_size != total_size && new_size == current_size) {
3849         error_report("Image was not resized; resizing may not be supported "
3850                      "for this image");
3851         ret = -1;
3852         goto out;
3853     }
3854 
3855     if (new_size != total_size) {
3856         warn_report("Image should have been resized to %" PRIi64
3857                     " bytes, but was resized to %" PRIi64 " bytes",
3858                     total_size, new_size);
3859     }
3860 
3861     qprintf(quiet, "Image resized.\n");
3862 
3863 out:
3864     blk_unref(blk);
3865     if (ret) {
3866         return 1;
3867     }
3868     return 0;
3869 }
3870 
3871 static void amend_status_cb(BlockDriverState *bs,
3872                             int64_t offset, int64_t total_work_size,
3873                             void *opaque)
3874 {
3875     qemu_progress_print(100.f * offset / total_work_size, 0);
3876 }
3877 
3878 static int print_amend_option_help(const char *format)
3879 {
3880     BlockDriver *drv;
3881 
3882     /* Find driver and parse its options */
3883     drv = bdrv_find_format(format);
3884     if (!drv) {
3885         error_report("Unknown file format '%s'", format);
3886         return 1;
3887     }
3888 
3889     if (!drv->bdrv_amend_options) {
3890         error_report("Format driver '%s' does not support option amendment",
3891                      format);
3892         return 1;
3893     }
3894 
3895     /* Every driver supporting amendment must have create_opts */
3896     assert(drv->create_opts);
3897 
3898     printf("Creation options for '%s':\n", format);
3899     qemu_opts_print_help(drv->create_opts, false);
3900     printf("\nNote that not all of these options may be amendable.\n");
3901     return 0;
3902 }
3903 
3904 static int img_amend(int argc, char **argv)
3905 {
3906     Error *err = NULL;
3907     int c, ret = 0;
3908     char *options = NULL;
3909     QemuOptsList *create_opts = NULL;
3910     QemuOpts *opts = NULL;
3911     const char *fmt = NULL, *filename, *cache;
3912     int flags;
3913     bool writethrough;
3914     bool quiet = false, progress = false;
3915     BlockBackend *blk = NULL;
3916     BlockDriverState *bs = NULL;
3917     bool image_opts = false;
3918 
3919     cache = BDRV_DEFAULT_CACHE;
3920     for (;;) {
3921         static const struct option long_options[] = {
3922             {"help", no_argument, 0, 'h'},
3923             {"object", required_argument, 0, OPTION_OBJECT},
3924             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3925             {0, 0, 0, 0}
3926         };
3927         c = getopt_long(argc, argv, ":ho:f:t:pq",
3928                         long_options, NULL);
3929         if (c == -1) {
3930             break;
3931         }
3932 
3933         switch (c) {
3934         case ':':
3935             missing_argument(argv[optind - 1]);
3936             break;
3937         case '?':
3938             unrecognized_option(argv[optind - 1]);
3939             break;
3940         case 'h':
3941             help();
3942             break;
3943         case 'o':
3944             if (!is_valid_option_list(optarg)) {
3945                 error_report("Invalid option list: %s", optarg);
3946                 ret = -1;
3947                 goto out_no_progress;
3948             }
3949             if (!options) {
3950                 options = g_strdup(optarg);
3951             } else {
3952                 char *old_options = options;
3953                 options = g_strdup_printf("%s,%s", options, optarg);
3954                 g_free(old_options);
3955             }
3956             break;
3957         case 'f':
3958             fmt = optarg;
3959             break;
3960         case 't':
3961             cache = optarg;
3962             break;
3963         case 'p':
3964             progress = true;
3965             break;
3966         case 'q':
3967             quiet = true;
3968             break;
3969         case OPTION_OBJECT:
3970             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3971                                            optarg, true);
3972             if (!opts) {
3973                 ret = -1;
3974                 goto out_no_progress;
3975             }
3976             break;
3977         case OPTION_IMAGE_OPTS:
3978             image_opts = true;
3979             break;
3980         }
3981     }
3982 
3983     if (!options) {
3984         error_exit("Must specify options (-o)");
3985     }
3986 
3987     if (qemu_opts_foreach(&qemu_object_opts,
3988                           user_creatable_add_opts_foreach,
3989                           NULL, &error_fatal)) {
3990         ret = -1;
3991         goto out_no_progress;
3992     }
3993 
3994     if (quiet) {
3995         progress = false;
3996     }
3997     qemu_progress_init(progress, 1.0);
3998 
3999     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4000     if (fmt && has_help_option(options)) {
4001         /* If a format is explicitly specified (and possibly no filename is
4002          * given), print option help here */
4003         ret = print_amend_option_help(fmt);
4004         goto out;
4005     }
4006 
4007     if (optind != argc - 1) {
4008         error_report("Expecting one image file name");
4009         ret = -1;
4010         goto out;
4011     }
4012 
4013     flags = BDRV_O_RDWR;
4014     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4015     if (ret < 0) {
4016         error_report("Invalid cache option: %s", cache);
4017         goto out;
4018     }
4019 
4020     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4021                    false);
4022     if (!blk) {
4023         ret = -1;
4024         goto out;
4025     }
4026     bs = blk_bs(blk);
4027 
4028     fmt = bs->drv->format_name;
4029 
4030     if (has_help_option(options)) {
4031         /* If the format was auto-detected, print option help here */
4032         ret = print_amend_option_help(fmt);
4033         goto out;
4034     }
4035 
4036     if (!bs->drv->bdrv_amend_options) {
4037         error_report("Format driver '%s' does not support option amendment",
4038                      fmt);
4039         ret = -1;
4040         goto out;
4041     }
4042 
4043     /* Every driver supporting amendment must have create_opts */
4044     assert(bs->drv->create_opts);
4045 
4046     create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
4047     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4048     qemu_opts_do_parse(opts, options, NULL, &err);
4049     if (err) {
4050         error_report_err(err);
4051         ret = -1;
4052         goto out;
4053     }
4054 
4055     /* In case the driver does not call amend_status_cb() */
4056     qemu_progress_print(0.f, 0);
4057     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, &err);
4058     qemu_progress_print(100.f, 0);
4059     if (ret < 0) {
4060         error_report_err(err);
4061         goto out;
4062     }
4063 
4064 out:
4065     qemu_progress_end();
4066 
4067 out_no_progress:
4068     blk_unref(blk);
4069     qemu_opts_del(opts);
4070     qemu_opts_free(create_opts);
4071     g_free(options);
4072 
4073     if (ret) {
4074         return 1;
4075     }
4076     return 0;
4077 }
4078 
4079 typedef struct BenchData {
4080     BlockBackend *blk;
4081     uint64_t image_size;
4082     bool write;
4083     int bufsize;
4084     int step;
4085     int nrreq;
4086     int n;
4087     int flush_interval;
4088     bool drain_on_flush;
4089     uint8_t *buf;
4090     QEMUIOVector *qiov;
4091 
4092     int in_flight;
4093     bool in_flush;
4094     uint64_t offset;
4095 } BenchData;
4096 
4097 static void bench_undrained_flush_cb(void *opaque, int ret)
4098 {
4099     if (ret < 0) {
4100         error_report("Failed flush request: %s", strerror(-ret));
4101         exit(EXIT_FAILURE);
4102     }
4103 }
4104 
4105 static void bench_cb(void *opaque, int ret)
4106 {
4107     BenchData *b = opaque;
4108     BlockAIOCB *acb;
4109 
4110     if (ret < 0) {
4111         error_report("Failed request: %s", strerror(-ret));
4112         exit(EXIT_FAILURE);
4113     }
4114 
4115     if (b->in_flush) {
4116         /* Just finished a flush with drained queue: Start next requests */
4117         assert(b->in_flight == 0);
4118         b->in_flush = false;
4119     } else if (b->in_flight > 0) {
4120         int remaining = b->n - b->in_flight;
4121 
4122         b->n--;
4123         b->in_flight--;
4124 
4125         /* Time for flush? Drain queue if requested, then flush */
4126         if (b->flush_interval && remaining % b->flush_interval == 0) {
4127             if (!b->in_flight || !b->drain_on_flush) {
4128                 BlockCompletionFunc *cb;
4129 
4130                 if (b->drain_on_flush) {
4131                     b->in_flush = true;
4132                     cb = bench_cb;
4133                 } else {
4134                     cb = bench_undrained_flush_cb;
4135                 }
4136 
4137                 acb = blk_aio_flush(b->blk, cb, b);
4138                 if (!acb) {
4139                     error_report("Failed to issue flush request");
4140                     exit(EXIT_FAILURE);
4141                 }
4142             }
4143             if (b->drain_on_flush) {
4144                 return;
4145             }
4146         }
4147     }
4148 
4149     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4150         int64_t offset = b->offset;
4151         /* blk_aio_* might look for completed I/Os and kick bench_cb
4152          * again, so make sure this operation is counted by in_flight
4153          * and b->offset is ready for the next submission.
4154          */
4155         b->in_flight++;
4156         b->offset += b->step;
4157         b->offset %= b->image_size;
4158         if (b->write) {
4159             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4160         } else {
4161             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4162         }
4163         if (!acb) {
4164             error_report("Failed to issue request");
4165             exit(EXIT_FAILURE);
4166         }
4167     }
4168 }
4169 
4170 static int img_bench(int argc, char **argv)
4171 {
4172     int c, ret = 0;
4173     const char *fmt = NULL, *filename;
4174     bool quiet = false;
4175     bool image_opts = false;
4176     bool is_write = false;
4177     int count = 75000;
4178     int depth = 64;
4179     int64_t offset = 0;
4180     size_t bufsize = 4096;
4181     int pattern = 0;
4182     size_t step = 0;
4183     int flush_interval = 0;
4184     bool drain_on_flush = true;
4185     int64_t image_size;
4186     BlockBackend *blk = NULL;
4187     BenchData data = {};
4188     int flags = 0;
4189     bool writethrough = false;
4190     struct timeval t1, t2;
4191     int i;
4192     bool force_share = false;
4193     size_t buf_size;
4194 
4195     for (;;) {
4196         static const struct option long_options[] = {
4197             {"help", no_argument, 0, 'h'},
4198             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4199             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4200             {"pattern", required_argument, 0, OPTION_PATTERN},
4201             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4202             {"force-share", no_argument, 0, 'U'},
4203             {0, 0, 0, 0}
4204         };
4205         c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:wU", long_options, NULL);
4206         if (c == -1) {
4207             break;
4208         }
4209 
4210         switch (c) {
4211         case ':':
4212             missing_argument(argv[optind - 1]);
4213             break;
4214         case '?':
4215             unrecognized_option(argv[optind - 1]);
4216             break;
4217         case 'h':
4218             help();
4219             break;
4220         case 'c':
4221         {
4222             unsigned long res;
4223 
4224             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4225                 error_report("Invalid request count specified");
4226                 return 1;
4227             }
4228             count = res;
4229             break;
4230         }
4231         case 'd':
4232         {
4233             unsigned long res;
4234 
4235             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4236                 error_report("Invalid queue depth specified");
4237                 return 1;
4238             }
4239             depth = res;
4240             break;
4241         }
4242         case 'f':
4243             fmt = optarg;
4244             break;
4245         case 'n':
4246             flags |= BDRV_O_NATIVE_AIO;
4247             break;
4248         case 'o':
4249         {
4250             offset = cvtnum(optarg);
4251             if (offset < 0) {
4252                 error_report("Invalid offset specified");
4253                 return 1;
4254             }
4255             break;
4256         }
4257             break;
4258         case 'q':
4259             quiet = true;
4260             break;
4261         case 's':
4262         {
4263             int64_t sval;
4264 
4265             sval = cvtnum(optarg);
4266             if (sval < 0 || sval > INT_MAX) {
4267                 error_report("Invalid buffer size specified");
4268                 return 1;
4269             }
4270 
4271             bufsize = sval;
4272             break;
4273         }
4274         case 'S':
4275         {
4276             int64_t sval;
4277 
4278             sval = cvtnum(optarg);
4279             if (sval < 0 || sval > INT_MAX) {
4280                 error_report("Invalid step size specified");
4281                 return 1;
4282             }
4283 
4284             step = sval;
4285             break;
4286         }
4287         case 't':
4288             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4289             if (ret < 0) {
4290                 error_report("Invalid cache mode");
4291                 ret = -1;
4292                 goto out;
4293             }
4294             break;
4295         case 'w':
4296             flags |= BDRV_O_RDWR;
4297             is_write = true;
4298             break;
4299         case 'U':
4300             force_share = true;
4301             break;
4302         case OPTION_PATTERN:
4303         {
4304             unsigned long res;
4305 
4306             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4307                 error_report("Invalid pattern byte specified");
4308                 return 1;
4309             }
4310             pattern = res;
4311             break;
4312         }
4313         case OPTION_FLUSH_INTERVAL:
4314         {
4315             unsigned long res;
4316 
4317             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4318                 error_report("Invalid flush interval specified");
4319                 return 1;
4320             }
4321             flush_interval = res;
4322             break;
4323         }
4324         case OPTION_NO_DRAIN:
4325             drain_on_flush = false;
4326             break;
4327         case OPTION_IMAGE_OPTS:
4328             image_opts = true;
4329             break;
4330         }
4331     }
4332 
4333     if (optind != argc - 1) {
4334         error_exit("Expecting one image file name");
4335     }
4336     filename = argv[argc - 1];
4337 
4338     if (!is_write && flush_interval) {
4339         error_report("--flush-interval is only available in write tests");
4340         ret = -1;
4341         goto out;
4342     }
4343     if (flush_interval && flush_interval < depth) {
4344         error_report("Flush interval can't be smaller than depth");
4345         ret = -1;
4346         goto out;
4347     }
4348 
4349     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4350                    force_share);
4351     if (!blk) {
4352         ret = -1;
4353         goto out;
4354     }
4355 
4356     image_size = blk_getlength(blk);
4357     if (image_size < 0) {
4358         ret = image_size;
4359         goto out;
4360     }
4361 
4362     data = (BenchData) {
4363         .blk            = blk,
4364         .image_size     = image_size,
4365         .bufsize        = bufsize,
4366         .step           = step ?: bufsize,
4367         .nrreq          = depth,
4368         .n              = count,
4369         .offset         = offset,
4370         .write          = is_write,
4371         .flush_interval = flush_interval,
4372         .drain_on_flush = drain_on_flush,
4373     };
4374     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4375            "(starting at offset %" PRId64 ", step size %d)\n",
4376            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4377            data.offset, data.step);
4378     if (flush_interval) {
4379         printf("Sending flush every %d requests\n", flush_interval);
4380     }
4381 
4382     buf_size = data.nrreq * data.bufsize;
4383     data.buf = blk_blockalign(blk, buf_size);
4384     memset(data.buf, pattern, data.nrreq * data.bufsize);
4385 
4386     blk_register_buf(blk, data.buf, buf_size);
4387 
4388     data.qiov = g_new(QEMUIOVector, data.nrreq);
4389     for (i = 0; i < data.nrreq; i++) {
4390         qemu_iovec_init(&data.qiov[i], 1);
4391         qemu_iovec_add(&data.qiov[i],
4392                        data.buf + i * data.bufsize, data.bufsize);
4393     }
4394 
4395     gettimeofday(&t1, NULL);
4396     bench_cb(&data, 0);
4397 
4398     while (data.n > 0) {
4399         main_loop_wait(false);
4400     }
4401     gettimeofday(&t2, NULL);
4402 
4403     printf("Run completed in %3.3f seconds.\n",
4404            (t2.tv_sec - t1.tv_sec)
4405            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4406 
4407 out:
4408     if (data.buf) {
4409         blk_unregister_buf(blk, data.buf);
4410     }
4411     qemu_vfree(data.buf);
4412     blk_unref(blk);
4413 
4414     if (ret) {
4415         return 1;
4416     }
4417     return 0;
4418 }
4419 
4420 #define C_BS      01
4421 #define C_COUNT   02
4422 #define C_IF      04
4423 #define C_OF      010
4424 #define C_SKIP    020
4425 
4426 struct DdInfo {
4427     unsigned int flags;
4428     int64_t count;
4429 };
4430 
4431 struct DdIo {
4432     int bsz;    /* Block size */
4433     char *filename;
4434     uint8_t *buf;
4435     int64_t offset;
4436 };
4437 
4438 struct DdOpts {
4439     const char *name;
4440     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
4441     unsigned int flag;
4442 };
4443 
4444 static int img_dd_bs(const char *arg,
4445                      struct DdIo *in, struct DdIo *out,
4446                      struct DdInfo *dd)
4447 {
4448     int64_t res;
4449 
4450     res = cvtnum(arg);
4451 
4452     if (res <= 0 || res > INT_MAX) {
4453         error_report("invalid number: '%s'", arg);
4454         return 1;
4455     }
4456     in->bsz = out->bsz = res;
4457 
4458     return 0;
4459 }
4460 
4461 static int img_dd_count(const char *arg,
4462                         struct DdIo *in, struct DdIo *out,
4463                         struct DdInfo *dd)
4464 {
4465     dd->count = cvtnum(arg);
4466 
4467     if (dd->count < 0) {
4468         error_report("invalid number: '%s'", arg);
4469         return 1;
4470     }
4471 
4472     return 0;
4473 }
4474 
4475 static int img_dd_if(const char *arg,
4476                      struct DdIo *in, struct DdIo *out,
4477                      struct DdInfo *dd)
4478 {
4479     in->filename = g_strdup(arg);
4480 
4481     return 0;
4482 }
4483 
4484 static int img_dd_of(const char *arg,
4485                      struct DdIo *in, struct DdIo *out,
4486                      struct DdInfo *dd)
4487 {
4488     out->filename = g_strdup(arg);
4489 
4490     return 0;
4491 }
4492 
4493 static int img_dd_skip(const char *arg,
4494                        struct DdIo *in, struct DdIo *out,
4495                        struct DdInfo *dd)
4496 {
4497     in->offset = cvtnum(arg);
4498 
4499     if (in->offset < 0) {
4500         error_report("invalid number: '%s'", arg);
4501         return 1;
4502     }
4503 
4504     return 0;
4505 }
4506 
4507 static int img_dd(int argc, char **argv)
4508 {
4509     int ret = 0;
4510     char *arg = NULL;
4511     char *tmp;
4512     BlockDriver *drv = NULL, *proto_drv = NULL;
4513     BlockBackend *blk1 = NULL, *blk2 = NULL;
4514     QemuOpts *opts = NULL;
4515     QemuOptsList *create_opts = NULL;
4516     Error *local_err = NULL;
4517     bool image_opts = false;
4518     int c, i;
4519     const char *out_fmt = "raw";
4520     const char *fmt = NULL;
4521     int64_t size = 0;
4522     int64_t block_count = 0, out_pos, in_pos;
4523     bool force_share = false;
4524     struct DdInfo dd = {
4525         .flags = 0,
4526         .count = 0,
4527     };
4528     struct DdIo in = {
4529         .bsz = 512, /* Block size is by default 512 bytes */
4530         .filename = NULL,
4531         .buf = NULL,
4532         .offset = 0
4533     };
4534     struct DdIo out = {
4535         .bsz = 512,
4536         .filename = NULL,
4537         .buf = NULL,
4538         .offset = 0
4539     };
4540 
4541     const struct DdOpts options[] = {
4542         { "bs", img_dd_bs, C_BS },
4543         { "count", img_dd_count, C_COUNT },
4544         { "if", img_dd_if, C_IF },
4545         { "of", img_dd_of, C_OF },
4546         { "skip", img_dd_skip, C_SKIP },
4547         { NULL, NULL, 0 }
4548     };
4549     const struct option long_options[] = {
4550         { "help", no_argument, 0, 'h'},
4551         { "object", required_argument, 0, OPTION_OBJECT},
4552         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4553         { "force-share", no_argument, 0, 'U'},
4554         { 0, 0, 0, 0 }
4555     };
4556 
4557     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
4558         if (c == EOF) {
4559             break;
4560         }
4561         switch (c) {
4562         case 'O':
4563             out_fmt = optarg;
4564             break;
4565         case 'f':
4566             fmt = optarg;
4567             break;
4568         case ':':
4569             missing_argument(argv[optind - 1]);
4570             break;
4571         case '?':
4572             unrecognized_option(argv[optind - 1]);
4573             break;
4574         case 'h':
4575             help();
4576             break;
4577         case 'U':
4578             force_share = true;
4579             break;
4580         case OPTION_OBJECT:
4581             if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) {
4582                 ret = -1;
4583                 goto out;
4584             }
4585             break;
4586         case OPTION_IMAGE_OPTS:
4587             image_opts = true;
4588             break;
4589         }
4590     }
4591 
4592     for (i = optind; i < argc; i++) {
4593         int j;
4594         arg = g_strdup(argv[i]);
4595 
4596         tmp = strchr(arg, '=');
4597         if (tmp == NULL) {
4598             error_report("unrecognized operand %s", arg);
4599             ret = -1;
4600             goto out;
4601         }
4602 
4603         *tmp++ = '\0';
4604 
4605         for (j = 0; options[j].name != NULL; j++) {
4606             if (!strcmp(arg, options[j].name)) {
4607                 break;
4608             }
4609         }
4610         if (options[j].name == NULL) {
4611             error_report("unrecognized operand %s", arg);
4612             ret = -1;
4613             goto out;
4614         }
4615 
4616         if (options[j].f(tmp, &in, &out, &dd) != 0) {
4617             ret = -1;
4618             goto out;
4619         }
4620         dd.flags |= options[j].flag;
4621         g_free(arg);
4622         arg = NULL;
4623     }
4624 
4625     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4626         error_report("Must specify both input and output files");
4627         ret = -1;
4628         goto out;
4629     }
4630 
4631     if (qemu_opts_foreach(&qemu_object_opts,
4632                           user_creatable_add_opts_foreach,
4633                           NULL, &error_fatal)) {
4634         ret = -1;
4635         goto out;
4636     }
4637 
4638     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
4639                     force_share);
4640 
4641     if (!blk1) {
4642         ret = -1;
4643         goto out;
4644     }
4645 
4646     drv = bdrv_find_format(out_fmt);
4647     if (!drv) {
4648         error_report("Unknown file format");
4649         ret = -1;
4650         goto out;
4651     }
4652     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4653 
4654     if (!proto_drv) {
4655         error_report_err(local_err);
4656         ret = -1;
4657         goto out;
4658     }
4659     if (!drv->create_opts) {
4660         error_report("Format driver '%s' does not support image creation",
4661                      drv->format_name);
4662         ret = -1;
4663         goto out;
4664     }
4665     if (!proto_drv->create_opts) {
4666         error_report("Protocol driver '%s' does not support image creation",
4667                      proto_drv->format_name);
4668         ret = -1;
4669         goto out;
4670     }
4671     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4672     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4673 
4674     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4675 
4676     size = blk_getlength(blk1);
4677     if (size < 0) {
4678         error_report("Failed to get size for '%s'", in.filename);
4679         ret = -1;
4680         goto out;
4681     }
4682 
4683     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4684         dd.count * in.bsz < size) {
4685         size = dd.count * in.bsz;
4686     }
4687 
4688     /* Overflow means the specified offset is beyond input image's size */
4689     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4690                               size < in.bsz * in.offset)) {
4691         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4692     } else {
4693         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4694                             size - in.bsz * in.offset, &error_abort);
4695     }
4696 
4697     ret = bdrv_create(drv, out.filename, opts, &local_err);
4698     if (ret < 0) {
4699         error_reportf_err(local_err,
4700                           "%s: error while creating output image: ",
4701                           out.filename);
4702         ret = -1;
4703         goto out;
4704     }
4705 
4706     /* TODO, we can't honour --image-opts for the target,
4707      * since it needs to be given in a format compatible
4708      * with the bdrv_create() call above which does not
4709      * support image-opts style.
4710      */
4711     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
4712                          false, false, false);
4713 
4714     if (!blk2) {
4715         ret = -1;
4716         goto out;
4717     }
4718 
4719     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4720                               size < in.offset * in.bsz)) {
4721         /* We give a warning if the skip option is bigger than the input
4722          * size and create an empty output disk image (i.e. like dd(1)).
4723          */
4724         error_report("%s: cannot skip to specified offset", in.filename);
4725         in_pos = size;
4726     } else {
4727         in_pos = in.offset * in.bsz;
4728     }
4729 
4730     in.buf = g_new(uint8_t, in.bsz);
4731 
4732     for (out_pos = 0; in_pos < size; block_count++) {
4733         int in_ret, out_ret;
4734 
4735         if (in_pos + in.bsz > size) {
4736             in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4737         } else {
4738             in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4739         }
4740         if (in_ret < 0) {
4741             error_report("error while reading from input image file: %s",
4742                          strerror(-in_ret));
4743             ret = -1;
4744             goto out;
4745         }
4746         in_pos += in_ret;
4747 
4748         out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4749 
4750         if (out_ret < 0) {
4751             error_report("error while writing to output image file: %s",
4752                          strerror(-out_ret));
4753             ret = -1;
4754             goto out;
4755         }
4756         out_pos += out_ret;
4757     }
4758 
4759 out:
4760     g_free(arg);
4761     qemu_opts_del(opts);
4762     qemu_opts_free(create_opts);
4763     blk_unref(blk1);
4764     blk_unref(blk2);
4765     g_free(in.filename);
4766     g_free(out.filename);
4767     g_free(in.buf);
4768     g_free(out.buf);
4769 
4770     if (ret) {
4771         return 1;
4772     }
4773     return 0;
4774 }
4775 
4776 static void dump_json_block_measure_info(BlockMeasureInfo *info)
4777 {
4778     QString *str;
4779     QObject *obj;
4780     Visitor *v = qobject_output_visitor_new(&obj);
4781 
4782     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
4783     visit_complete(v, &obj);
4784     str = qobject_to_json_pretty(obj);
4785     assert(str != NULL);
4786     printf("%s\n", qstring_get_str(str));
4787     qobject_unref(obj);
4788     visit_free(v);
4789     qobject_unref(str);
4790 }
4791 
4792 static int img_measure(int argc, char **argv)
4793 {
4794     static const struct option long_options[] = {
4795         {"help", no_argument, 0, 'h'},
4796         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4797         {"object", required_argument, 0, OPTION_OBJECT},
4798         {"output", required_argument, 0, OPTION_OUTPUT},
4799         {"size", required_argument, 0, OPTION_SIZE},
4800         {"force-share", no_argument, 0, 'U'},
4801         {0, 0, 0, 0}
4802     };
4803     OutputFormat output_format = OFORMAT_HUMAN;
4804     BlockBackend *in_blk = NULL;
4805     BlockDriver *drv;
4806     const char *filename = NULL;
4807     const char *fmt = NULL;
4808     const char *out_fmt = "raw";
4809     char *options = NULL;
4810     char *snapshot_name = NULL;
4811     bool force_share = false;
4812     QemuOpts *opts = NULL;
4813     QemuOpts *object_opts = NULL;
4814     QemuOpts *sn_opts = NULL;
4815     QemuOptsList *create_opts = NULL;
4816     bool image_opts = false;
4817     uint64_t img_size = UINT64_MAX;
4818     BlockMeasureInfo *info = NULL;
4819     Error *local_err = NULL;
4820     int ret = 1;
4821     int c;
4822 
4823     while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
4824                             long_options, NULL)) != -1) {
4825         switch (c) {
4826         case '?':
4827         case 'h':
4828             help();
4829             break;
4830         case 'f':
4831             fmt = optarg;
4832             break;
4833         case 'O':
4834             out_fmt = optarg;
4835             break;
4836         case 'o':
4837             if (!is_valid_option_list(optarg)) {
4838                 error_report("Invalid option list: %s", optarg);
4839                 goto out;
4840             }
4841             if (!options) {
4842                 options = g_strdup(optarg);
4843             } else {
4844                 char *old_options = options;
4845                 options = g_strdup_printf("%s,%s", options, optarg);
4846                 g_free(old_options);
4847             }
4848             break;
4849         case 'l':
4850             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
4851                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
4852                                                   optarg, false);
4853                 if (!sn_opts) {
4854                     error_report("Failed in parsing snapshot param '%s'",
4855                                  optarg);
4856                     goto out;
4857                 }
4858             } else {
4859                 snapshot_name = optarg;
4860             }
4861             break;
4862         case 'U':
4863             force_share = true;
4864             break;
4865         case OPTION_OBJECT:
4866             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
4867                                                   optarg, true);
4868             if (!object_opts) {
4869                 goto out;
4870             }
4871             break;
4872         case OPTION_IMAGE_OPTS:
4873             image_opts = true;
4874             break;
4875         case OPTION_OUTPUT:
4876             if (!strcmp(optarg, "json")) {
4877                 output_format = OFORMAT_JSON;
4878             } else if (!strcmp(optarg, "human")) {
4879                 output_format = OFORMAT_HUMAN;
4880             } else {
4881                 error_report("--output must be used with human or json "
4882                              "as argument.");
4883                 goto out;
4884             }
4885             break;
4886         case OPTION_SIZE:
4887         {
4888             int64_t sval;
4889 
4890             sval = cvtnum(optarg);
4891             if (sval < 0) {
4892                 if (sval == -ERANGE) {
4893                     error_report("Image size must be less than 8 EiB!");
4894                 } else {
4895                     error_report("Invalid image size specified! You may use "
4896                                  "k, M, G, T, P or E suffixes for ");
4897                     error_report("kilobytes, megabytes, gigabytes, terabytes, "
4898                                  "petabytes and exabytes.");
4899                 }
4900                 goto out;
4901             }
4902             img_size = (uint64_t)sval;
4903         }
4904         break;
4905         }
4906     }
4907 
4908     if (qemu_opts_foreach(&qemu_object_opts,
4909                           user_creatable_add_opts_foreach,
4910                           NULL, &error_fatal)) {
4911         goto out;
4912     }
4913 
4914     if (argc - optind > 1) {
4915         error_report("At most one filename argument is allowed.");
4916         goto out;
4917     } else if (argc - optind == 1) {
4918         filename = argv[optind];
4919     }
4920 
4921     if (!filename &&
4922         (object_opts || image_opts || fmt || snapshot_name || sn_opts)) {
4923         error_report("--object, --image-opts, -f, and -l "
4924                      "require a filename argument.");
4925         goto out;
4926     }
4927     if (filename && img_size != UINT64_MAX) {
4928         error_report("--size N cannot be used together with a filename.");
4929         goto out;
4930     }
4931     if (!filename && img_size == UINT64_MAX) {
4932         error_report("Either --size N or one filename must be specified.");
4933         goto out;
4934     }
4935 
4936     if (filename) {
4937         in_blk = img_open(image_opts, filename, fmt, 0,
4938                           false, false, force_share);
4939         if (!in_blk) {
4940             goto out;
4941         }
4942 
4943         if (sn_opts) {
4944             bdrv_snapshot_load_tmp(blk_bs(in_blk),
4945                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
4946                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
4947                     &local_err);
4948         } else if (snapshot_name != NULL) {
4949             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
4950                     snapshot_name, &local_err);
4951         }
4952         if (local_err) {
4953             error_reportf_err(local_err, "Failed to load snapshot: ");
4954             goto out;
4955         }
4956     }
4957 
4958     drv = bdrv_find_format(out_fmt);
4959     if (!drv) {
4960         error_report("Unknown file format '%s'", out_fmt);
4961         goto out;
4962     }
4963     if (!drv->create_opts) {
4964         error_report("Format driver '%s' does not support image creation",
4965                      drv->format_name);
4966         goto out;
4967     }
4968 
4969     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4970     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
4971     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4972     if (options) {
4973         qemu_opts_do_parse(opts, options, NULL, &local_err);
4974         if (local_err) {
4975             error_report_err(local_err);
4976             error_report("Invalid options for file format '%s'", out_fmt);
4977             goto out;
4978         }
4979     }
4980     if (img_size != UINT64_MAX) {
4981         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
4982     }
4983 
4984     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
4985     if (local_err) {
4986         error_report_err(local_err);
4987         goto out;
4988     }
4989 
4990     if (output_format == OFORMAT_HUMAN) {
4991         printf("required size: %" PRIu64 "\n", info->required);
4992         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
4993     } else {
4994         dump_json_block_measure_info(info);
4995     }
4996 
4997     ret = 0;
4998 
4999 out:
5000     qapi_free_BlockMeasureInfo(info);
5001     qemu_opts_del(object_opts);
5002     qemu_opts_del(opts);
5003     qemu_opts_del(sn_opts);
5004     qemu_opts_free(create_opts);
5005     g_free(options);
5006     blk_unref(in_blk);
5007     return ret;
5008 }
5009 
5010 static const img_cmd_t img_cmds[] = {
5011 #define DEF(option, callback, arg_string)        \
5012     { option, callback },
5013 #include "qemu-img-cmds.h"
5014 #undef DEF
5015     { NULL, NULL, },
5016 };
5017 
5018 int main(int argc, char **argv)
5019 {
5020     const img_cmd_t *cmd;
5021     const char *cmdname;
5022     Error *local_error = NULL;
5023     char *trace_file = NULL;
5024     int c;
5025     static const struct option long_options[] = {
5026         {"help", no_argument, 0, 'h'},
5027         {"version", no_argument, 0, 'V'},
5028         {"trace", required_argument, NULL, 'T'},
5029         {0, 0, 0, 0}
5030     };
5031 
5032 #ifdef CONFIG_POSIX
5033     signal(SIGPIPE, SIG_IGN);
5034 #endif
5035 
5036     error_init(argv[0]);
5037     module_call_init(MODULE_INIT_TRACE);
5038     qemu_init_exec_dir(argv[0]);
5039 
5040     if (qemu_init_main_loop(&local_error)) {
5041         error_report_err(local_error);
5042         exit(EXIT_FAILURE);
5043     }
5044 
5045     qcrypto_init(&error_fatal);
5046 
5047     module_call_init(MODULE_INIT_QOM);
5048     bdrv_init();
5049     if (argc < 2) {
5050         error_exit("Not enough arguments");
5051     }
5052 
5053     qemu_add_opts(&qemu_object_opts);
5054     qemu_add_opts(&qemu_source_opts);
5055     qemu_add_opts(&qemu_trace_opts);
5056 
5057     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5058         switch (c) {
5059         case ':':
5060             missing_argument(argv[optind - 1]);
5061             return 0;
5062         case '?':
5063             unrecognized_option(argv[optind - 1]);
5064             return 0;
5065         case 'h':
5066             help();
5067             return 0;
5068         case 'V':
5069             printf(QEMU_IMG_VERSION);
5070             return 0;
5071         case 'T':
5072             g_free(trace_file);
5073             trace_file = trace_opt_parse(optarg);
5074             break;
5075         }
5076     }
5077 
5078     cmdname = argv[optind];
5079 
5080     /* reset getopt_long scanning */
5081     argc -= optind;
5082     if (argc < 1) {
5083         return 0;
5084     }
5085     argv += optind;
5086     qemu_reset_optind();
5087 
5088     if (!trace_init_backends()) {
5089         exit(1);
5090     }
5091     trace_init_file(trace_file);
5092     qemu_set_log(LOG_TRACE);
5093 
5094     /* find the command */
5095     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5096         if (!strcmp(cmdname, cmd->name)) {
5097             return cmd->handler(argc, argv);
5098         }
5099     }
5100 
5101     /* not found */
5102     error_exit("Command not found: %s", cmdname);
5103 }
5104