xref: /openbmc/qemu/qemu-img.c (revision 2fc979cb)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu-common.h"
29 #include "qemu-version.h"
30 #include "qapi/error.h"
31 #include "qapi/qapi-visit-block-core.h"
32 #include "qapi/qobject-output-visitor.h"
33 #include "qapi/qmp/qjson.h"
34 #include "qapi/qmp/qdict.h"
35 #include "qapi/qmp/qstring.h"
36 #include "qemu/cutils.h"
37 #include "qemu/config-file.h"
38 #include "qemu/option.h"
39 #include "qemu/error-report.h"
40 #include "qemu/log.h"
41 #include "qemu/main-loop.h"
42 #include "qemu/module.h"
43 #include "qemu/units.h"
44 #include "qom/object_interfaces.h"
45 #include "sysemu/block-backend.h"
46 #include "block/block_int.h"
47 #include "block/blockjob.h"
48 #include "block/qapi.h"
49 #include "crypto/init.h"
50 #include "trace/control.h"
51 
52 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
53                           "\n" QEMU_COPYRIGHT "\n"
54 
55 typedef struct img_cmd_t {
56     const char *name;
57     int (*handler)(int argc, char **argv);
58 } img_cmd_t;
59 
60 enum {
61     OPTION_OUTPUT = 256,
62     OPTION_BACKING_CHAIN = 257,
63     OPTION_OBJECT = 258,
64     OPTION_IMAGE_OPTS = 259,
65     OPTION_PATTERN = 260,
66     OPTION_FLUSH_INTERVAL = 261,
67     OPTION_NO_DRAIN = 262,
68     OPTION_TARGET_IMAGE_OPTS = 263,
69     OPTION_SIZE = 264,
70     OPTION_PREALLOCATION = 265,
71     OPTION_SHRINK = 266,
72     OPTION_SALVAGE = 267,
73     OPTION_TARGET_IS_ZERO = 268,
74 };
75 
76 typedef enum OutputFormat {
77     OFORMAT_JSON,
78     OFORMAT_HUMAN,
79 } OutputFormat;
80 
81 /* Default to cache=writeback as data integrity is not important for qemu-img */
82 #define BDRV_DEFAULT_CACHE "writeback"
83 
84 static void format_print(void *opaque, const char *name)
85 {
86     printf(" %s", name);
87 }
88 
89 static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
90 {
91     va_list ap;
92 
93     va_start(ap, fmt);
94     error_vreport(fmt, ap);
95     va_end(ap);
96 
97     error_printf("Try 'qemu-img --help' for more information\n");
98     exit(EXIT_FAILURE);
99 }
100 
101 static void QEMU_NORETURN missing_argument(const char *option)
102 {
103     error_exit("missing argument for option '%s'", option);
104 }
105 
106 static void QEMU_NORETURN unrecognized_option(const char *option)
107 {
108     error_exit("unrecognized option '%s'", option);
109 }
110 
111 /* Please keep in synch with qemu-img.texi */
112 static void QEMU_NORETURN help(void)
113 {
114     const char *help_msg =
115            QEMU_IMG_VERSION
116            "usage: qemu-img [standard options] command [command options]\n"
117            "QEMU disk image utility\n"
118            "\n"
119            "    '-h', '--help'       display this help and exit\n"
120            "    '-V', '--version'    output version information and exit\n"
121            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
122            "                         specify tracing options\n"
123            "\n"
124            "Command syntax:\n"
125 #define DEF(option, callback, arg_string)        \
126            "  " arg_string "\n"
127 #include "qemu-img-cmds.h"
128 #undef DEF
129            "\n"
130            "Command parameters:\n"
131            "  'filename' is a disk image filename\n"
132            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
133            "    manual page for a description of the object properties. The most common\n"
134            "    object type is a 'secret', which is used to supply passwords and/or\n"
135            "    encryption keys.\n"
136            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
137            "  'cache' is the cache mode used to write the output disk image, the valid\n"
138            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
139            "    'directsync' and 'unsafe' (default for convert)\n"
140            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
141            "    options are the same as for the 'cache' option\n"
142            "  'size' is the disk image size in bytes. Optional suffixes\n"
143            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
144            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
145            "    supported. 'b' is ignored.\n"
146            "  'output_filename' is the destination disk image filename\n"
147            "  'output_fmt' is the destination format\n"
148            "  'options' is a comma separated list of format specific options in a\n"
149            "    name=value format. Use -o ? for an overview of the options supported by the\n"
150            "    used format\n"
151            "  'snapshot_param' is param used for internal snapshot, format\n"
152            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
153            "    '[ID_OR_NAME]'\n"
154            "  '-c' indicates that target image must be compressed (qcow format only)\n"
155            "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
156            "       new backing file match exactly. The image doesn't need a working\n"
157            "       backing file before rebasing in this case (useful for renaming the\n"
158            "       backing file). For image creation, allow creating without attempting\n"
159            "       to open the backing file.\n"
160            "  '-h' with or without a command shows this help and lists the supported formats\n"
161            "  '-p' show progress of command (only certain commands)\n"
162            "  '-q' use Quiet mode - do not print any output (except errors)\n"
163            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
164            "       contain only zeros for qemu-img to create a sparse image during\n"
165            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
166            "       unallocated or zero sectors, and the destination image will always be\n"
167            "       fully allocated\n"
168            "  '--output' takes the format in which the output must be done (human or json)\n"
169            "  '-n' skips the target volume creation (useful if the volume is created\n"
170            "       prior to running qemu-img)\n"
171            "\n"
172            "Parameters to check subcommand:\n"
173            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
174            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
175            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
176            "       hiding corruption that has already occurred.\n"
177            "\n"
178            "Parameters to convert subcommand:\n"
179            "  '-m' specifies how many coroutines work in parallel during the convert\n"
180            "       process (defaults to 8)\n"
181            "  '-W' allow to write to the target out of order rather than sequential\n"
182            "\n"
183            "Parameters to snapshot subcommand:\n"
184            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
185            "  '-a' applies a snapshot (revert disk to saved state)\n"
186            "  '-c' creates a snapshot\n"
187            "  '-d' deletes a snapshot\n"
188            "  '-l' lists all snapshots in the given image\n"
189            "\n"
190            "Parameters to compare subcommand:\n"
191            "  '-f' first image format\n"
192            "  '-F' second image format\n"
193            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
194            "\n"
195            "Parameters to dd subcommand:\n"
196            "  'bs=BYTES' read and write up to BYTES bytes at a time "
197            "(default: 512)\n"
198            "  'count=N' copy only N input blocks\n"
199            "  'if=FILE' read from FILE\n"
200            "  'of=FILE' write to FILE\n"
201            "  'skip=N' skip N bs-sized blocks at the start of input\n";
202 
203     printf("%s\nSupported formats:", help_msg);
204     bdrv_iterate_format(format_print, NULL, false);
205     printf("\n\n" QEMU_HELP_BOTTOM "\n");
206     exit(EXIT_SUCCESS);
207 }
208 
209 static QemuOptsList qemu_object_opts = {
210     .name = "object",
211     .implied_opt_name = "qom-type",
212     .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
213     .desc = {
214         { }
215     },
216 };
217 
218 static bool qemu_img_object_print_help(const char *type, QemuOpts *opts)
219 {
220     if (user_creatable_print_help(type, opts)) {
221         exit(0);
222     }
223     return true;
224 }
225 
226 /*
227  * Is @optarg safe for accumulate_options()?
228  * It is when multiple of them can be joined together separated by ','.
229  * To make that work, @optarg must not start with ',' (or else a
230  * separating ',' preceding it gets escaped), and it must not end with
231  * an odd number of ',' (or else a separating ',' following it gets
232  * escaped), or be empty (or else a separating ',' preceding it can
233  * escape a separating ',' following it).
234  *
235  */
236 static bool is_valid_option_list(const char *optarg)
237 {
238     size_t len = strlen(optarg);
239     size_t i;
240 
241     if (!optarg[0] || optarg[0] == ',') {
242         return false;
243     }
244 
245     for (i = len; i > 0 && optarg[i - 1] == ','; i--) {
246     }
247     if ((len - i) % 2) {
248         return false;
249     }
250 
251     return true;
252 }
253 
254 static int accumulate_options(char **options, char *optarg)
255 {
256     char *new_options;
257 
258     if (!is_valid_option_list(optarg)) {
259         error_report("Invalid option list: %s", optarg);
260         return -1;
261     }
262 
263     if (!*options) {
264         *options = g_strdup(optarg);
265     } else {
266         new_options = g_strdup_printf("%s,%s", *options, optarg);
267         g_free(*options);
268         *options = new_options;
269     }
270     return 0;
271 }
272 
273 static QemuOptsList qemu_source_opts = {
274     .name = "source",
275     .implied_opt_name = "file",
276     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
277     .desc = {
278         { }
279     },
280 };
281 
282 static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
283 {
284     int ret = 0;
285     if (!quiet) {
286         va_list args;
287         va_start(args, fmt);
288         ret = vprintf(fmt, args);
289         va_end(args);
290     }
291     return ret;
292 }
293 
294 
295 static int print_block_option_help(const char *filename, const char *fmt)
296 {
297     BlockDriver *drv, *proto_drv;
298     QemuOptsList *create_opts = NULL;
299     Error *local_err = NULL;
300 
301     /* Find driver and parse its options */
302     drv = bdrv_find_format(fmt);
303     if (!drv) {
304         error_report("Unknown file format '%s'", fmt);
305         return 1;
306     }
307 
308     if (!drv->create_opts) {
309         error_report("Format driver '%s' does not support image creation", fmt);
310         return 1;
311     }
312 
313     create_opts = qemu_opts_append(create_opts, drv->create_opts);
314     if (filename) {
315         proto_drv = bdrv_find_protocol(filename, true, &local_err);
316         if (!proto_drv) {
317             error_report_err(local_err);
318             qemu_opts_free(create_opts);
319             return 1;
320         }
321         if (!proto_drv->create_opts) {
322             error_report("Protocol driver '%s' does not support image creation",
323                          proto_drv->format_name);
324             qemu_opts_free(create_opts);
325             return 1;
326         }
327         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
328     }
329 
330     if (filename) {
331         printf("Supported options:\n");
332     } else {
333         printf("Supported %s options:\n", fmt);
334     }
335     qemu_opts_print_help(create_opts, false);
336     qemu_opts_free(create_opts);
337 
338     if (!filename) {
339         printf("\n"
340                "The protocol level may support further options.\n"
341                "Specify the target filename to include those options.\n");
342     }
343 
344     return 0;
345 }
346 
347 
348 static BlockBackend *img_open_opts(const char *optstr,
349                                    QemuOpts *opts, int flags, bool writethrough,
350                                    bool quiet, bool force_share)
351 {
352     QDict *options;
353     Error *local_err = NULL;
354     BlockBackend *blk;
355     options = qemu_opts_to_qdict(opts, NULL);
356     if (force_share) {
357         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
358             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
359             error_report("--force-share/-U conflicts with image options");
360             qobject_unref(options);
361             return NULL;
362         }
363         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
364     }
365     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
366     if (!blk) {
367         error_reportf_err(local_err, "Could not open '%s': ", optstr);
368         return NULL;
369     }
370     blk_set_enable_write_cache(blk, !writethrough);
371 
372     return blk;
373 }
374 
375 static BlockBackend *img_open_file(const char *filename,
376                                    QDict *options,
377                                    const char *fmt, int flags,
378                                    bool writethrough, bool quiet,
379                                    bool force_share)
380 {
381     BlockBackend *blk;
382     Error *local_err = NULL;
383 
384     if (!options) {
385         options = qdict_new();
386     }
387     if (fmt) {
388         qdict_put_str(options, "driver", fmt);
389     }
390 
391     if (force_share) {
392         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
393     }
394     blk = blk_new_open(filename, NULL, options, flags, &local_err);
395     if (!blk) {
396         error_reportf_err(local_err, "Could not open '%s': ", filename);
397         return NULL;
398     }
399     blk_set_enable_write_cache(blk, !writethrough);
400 
401     return blk;
402 }
403 
404 
405 static int img_add_key_secrets(void *opaque,
406                                const char *name, const char *value,
407                                Error **errp)
408 {
409     QDict *options = opaque;
410 
411     if (g_str_has_suffix(name, "key-secret")) {
412         qdict_put_str(options, name, value);
413     }
414 
415     return 0;
416 }
417 
418 
419 static BlockBackend *img_open(bool image_opts,
420                               const char *filename,
421                               const char *fmt, int flags, bool writethrough,
422                               bool quiet, bool force_share)
423 {
424     BlockBackend *blk;
425     if (image_opts) {
426         QemuOpts *opts;
427         if (fmt) {
428             error_report("--image-opts and --format are mutually exclusive");
429             return NULL;
430         }
431         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
432                                        filename, true);
433         if (!opts) {
434             return NULL;
435         }
436         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
437                             force_share);
438     } else {
439         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
440                             force_share);
441     }
442     return blk;
443 }
444 
445 
446 static int add_old_style_options(const char *fmt, QemuOpts *opts,
447                                  const char *base_filename,
448                                  const char *base_fmt)
449 {
450     Error *err = NULL;
451 
452     if (base_filename) {
453         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
454         if (err) {
455             error_report("Backing file not supported for file format '%s'",
456                          fmt);
457             error_free(err);
458             return -1;
459         }
460     }
461     if (base_fmt) {
462         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
463         if (err) {
464             error_report("Backing file format not supported for file "
465                          "format '%s'", fmt);
466             error_free(err);
467             return -1;
468         }
469     }
470     return 0;
471 }
472 
473 static int64_t cvtnum(const char *s)
474 {
475     int err;
476     uint64_t value;
477 
478     err = qemu_strtosz(s, NULL, &value);
479     if (err < 0) {
480         return err;
481     }
482     if (value > INT64_MAX) {
483         return -ERANGE;
484     }
485     return value;
486 }
487 
488 static int img_create(int argc, char **argv)
489 {
490     int c;
491     uint64_t img_size = -1;
492     const char *fmt = "raw";
493     const char *base_fmt = NULL;
494     const char *filename;
495     const char *base_filename = NULL;
496     char *options = NULL;
497     Error *local_err = NULL;
498     bool quiet = false;
499     int flags = 0;
500 
501     for(;;) {
502         static const struct option long_options[] = {
503             {"help", no_argument, 0, 'h'},
504             {"object", required_argument, 0, OPTION_OBJECT},
505             {0, 0, 0, 0}
506         };
507         c = getopt_long(argc, argv, ":F:b:f:ho:qu",
508                         long_options, NULL);
509         if (c == -1) {
510             break;
511         }
512         switch(c) {
513         case ':':
514             missing_argument(argv[optind - 1]);
515             break;
516         case '?':
517             unrecognized_option(argv[optind - 1]);
518             break;
519         case 'h':
520             help();
521             break;
522         case 'F':
523             base_fmt = optarg;
524             break;
525         case 'b':
526             base_filename = optarg;
527             break;
528         case 'f':
529             fmt = optarg;
530             break;
531         case 'o':
532             if (accumulate_options(&options, optarg) < 0) {
533                 goto fail;
534             }
535             break;
536         case 'q':
537             quiet = true;
538             break;
539         case 'u':
540             flags |= BDRV_O_NO_BACKING;
541             break;
542         case OPTION_OBJECT: {
543             QemuOpts *opts;
544             opts = qemu_opts_parse_noisily(&qemu_object_opts,
545                                            optarg, true);
546             if (!opts) {
547                 goto fail;
548             }
549         }   break;
550         }
551     }
552 
553     /* Get the filename */
554     filename = (optind < argc) ? argv[optind] : NULL;
555     if (options && has_help_option(options)) {
556         g_free(options);
557         return print_block_option_help(filename, fmt);
558     }
559 
560     if (optind >= argc) {
561         error_exit("Expecting image file name");
562     }
563     optind++;
564 
565     if (qemu_opts_foreach(&qemu_object_opts,
566                           user_creatable_add_opts_foreach,
567                           qemu_img_object_print_help, &error_fatal)) {
568         goto fail;
569     }
570 
571     /* Get image size, if specified */
572     if (optind < argc) {
573         int64_t sval;
574 
575         sval = cvtnum(argv[optind++]);
576         if (sval < 0) {
577             if (sval == -ERANGE) {
578                 error_report("Image size must be less than 8 EiB!");
579             } else {
580                 error_report("Invalid image size specified! You may use k, M, "
581                       "G, T, P or E suffixes for ");
582                 error_report("kilobytes, megabytes, gigabytes, terabytes, "
583                              "petabytes and exabytes.");
584             }
585             goto fail;
586         }
587         img_size = (uint64_t)sval;
588     }
589     if (optind != argc) {
590         error_exit("Unexpected argument: %s", argv[optind]);
591     }
592 
593     bdrv_img_create(filename, fmt, base_filename, base_fmt,
594                     options, img_size, flags, quiet, &local_err);
595     if (local_err) {
596         error_reportf_err(local_err, "%s: ", filename);
597         goto fail;
598     }
599 
600     g_free(options);
601     return 0;
602 
603 fail:
604     g_free(options);
605     return 1;
606 }
607 
608 static void dump_json_image_check(ImageCheck *check, bool quiet)
609 {
610     QString *str;
611     QObject *obj;
612     Visitor *v = qobject_output_visitor_new(&obj);
613 
614     visit_type_ImageCheck(v, NULL, &check, &error_abort);
615     visit_complete(v, &obj);
616     str = qobject_to_json_pretty(obj);
617     assert(str != NULL);
618     qprintf(quiet, "%s\n", qstring_get_str(str));
619     qobject_unref(obj);
620     visit_free(v);
621     qobject_unref(str);
622 }
623 
624 static void dump_human_image_check(ImageCheck *check, bool quiet)
625 {
626     if (!(check->corruptions || check->leaks || check->check_errors)) {
627         qprintf(quiet, "No errors were found on the image.\n");
628     } else {
629         if (check->corruptions) {
630             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
631                     "Data may be corrupted, or further writes to the image "
632                     "may corrupt it.\n",
633                     check->corruptions);
634         }
635 
636         if (check->leaks) {
637             qprintf(quiet,
638                     "\n%" PRId64 " leaked clusters were found on the image.\n"
639                     "This means waste of disk space, but no harm to data.\n",
640                     check->leaks);
641         }
642 
643         if (check->check_errors) {
644             qprintf(quiet,
645                     "\n%" PRId64
646                     " internal errors have occurred during the check.\n",
647                     check->check_errors);
648         }
649     }
650 
651     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
652         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
653                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
654                 check->allocated_clusters, check->total_clusters,
655                 check->allocated_clusters * 100.0 / check->total_clusters,
656                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
657                 check->compressed_clusters * 100.0 /
658                 check->allocated_clusters);
659     }
660 
661     if (check->image_end_offset) {
662         qprintf(quiet,
663                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
664     }
665 }
666 
667 static int collect_image_check(BlockDriverState *bs,
668                    ImageCheck *check,
669                    const char *filename,
670                    const char *fmt,
671                    int fix)
672 {
673     int ret;
674     BdrvCheckResult result;
675 
676     ret = bdrv_check(bs, &result, fix);
677     if (ret < 0) {
678         return ret;
679     }
680 
681     check->filename                 = g_strdup(filename);
682     check->format                   = g_strdup(bdrv_get_format_name(bs));
683     check->check_errors             = result.check_errors;
684     check->corruptions              = result.corruptions;
685     check->has_corruptions          = result.corruptions != 0;
686     check->leaks                    = result.leaks;
687     check->has_leaks                = result.leaks != 0;
688     check->corruptions_fixed        = result.corruptions_fixed;
689     check->has_corruptions_fixed    = result.corruptions_fixed != 0;
690     check->leaks_fixed              = result.leaks_fixed;
691     check->has_leaks_fixed          = result.leaks_fixed != 0;
692     check->image_end_offset         = result.image_end_offset;
693     check->has_image_end_offset     = result.image_end_offset != 0;
694     check->total_clusters           = result.bfi.total_clusters;
695     check->has_total_clusters       = result.bfi.total_clusters != 0;
696     check->allocated_clusters       = result.bfi.allocated_clusters;
697     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
698     check->fragmented_clusters      = result.bfi.fragmented_clusters;
699     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
700     check->compressed_clusters      = result.bfi.compressed_clusters;
701     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
702 
703     return 0;
704 }
705 
706 /*
707  * Checks an image for consistency. Exit codes:
708  *
709  *  0 - Check completed, image is good
710  *  1 - Check not completed because of internal errors
711  *  2 - Check completed, image is corrupted
712  *  3 - Check completed, image has leaked clusters, but is good otherwise
713  * 63 - Checks are not supported by the image format
714  */
715 static int img_check(int argc, char **argv)
716 {
717     int c, ret;
718     OutputFormat output_format = OFORMAT_HUMAN;
719     const char *filename, *fmt, *output, *cache;
720     BlockBackend *blk;
721     BlockDriverState *bs;
722     int fix = 0;
723     int flags = BDRV_O_CHECK;
724     bool writethrough;
725     ImageCheck *check;
726     bool quiet = false;
727     bool image_opts = false;
728     bool force_share = false;
729 
730     fmt = NULL;
731     output = NULL;
732     cache = BDRV_DEFAULT_CACHE;
733 
734     for(;;) {
735         int option_index = 0;
736         static const struct option long_options[] = {
737             {"help", no_argument, 0, 'h'},
738             {"format", required_argument, 0, 'f'},
739             {"repair", required_argument, 0, 'r'},
740             {"output", required_argument, 0, OPTION_OUTPUT},
741             {"object", required_argument, 0, OPTION_OBJECT},
742             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
743             {"force-share", no_argument, 0, 'U'},
744             {0, 0, 0, 0}
745         };
746         c = getopt_long(argc, argv, ":hf:r:T:qU",
747                         long_options, &option_index);
748         if (c == -1) {
749             break;
750         }
751         switch(c) {
752         case ':':
753             missing_argument(argv[optind - 1]);
754             break;
755         case '?':
756             unrecognized_option(argv[optind - 1]);
757             break;
758         case 'h':
759             help();
760             break;
761         case 'f':
762             fmt = optarg;
763             break;
764         case 'r':
765             flags |= BDRV_O_RDWR;
766 
767             if (!strcmp(optarg, "leaks")) {
768                 fix = BDRV_FIX_LEAKS;
769             } else if (!strcmp(optarg, "all")) {
770                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
771             } else {
772                 error_exit("Unknown option value for -r "
773                            "(expecting 'leaks' or 'all'): %s", optarg);
774             }
775             break;
776         case OPTION_OUTPUT:
777             output = optarg;
778             break;
779         case 'T':
780             cache = optarg;
781             break;
782         case 'q':
783             quiet = true;
784             break;
785         case 'U':
786             force_share = true;
787             break;
788         case OPTION_OBJECT: {
789             QemuOpts *opts;
790             opts = qemu_opts_parse_noisily(&qemu_object_opts,
791                                            optarg, true);
792             if (!opts) {
793                 return 1;
794             }
795         }   break;
796         case OPTION_IMAGE_OPTS:
797             image_opts = true;
798             break;
799         }
800     }
801     if (optind != argc - 1) {
802         error_exit("Expecting one image file name");
803     }
804     filename = argv[optind++];
805 
806     if (output && !strcmp(output, "json")) {
807         output_format = OFORMAT_JSON;
808     } else if (output && !strcmp(output, "human")) {
809         output_format = OFORMAT_HUMAN;
810     } else if (output) {
811         error_report("--output must be used with human or json as argument.");
812         return 1;
813     }
814 
815     if (qemu_opts_foreach(&qemu_object_opts,
816                           user_creatable_add_opts_foreach,
817                           qemu_img_object_print_help, &error_fatal)) {
818         return 1;
819     }
820 
821     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
822     if (ret < 0) {
823         error_report("Invalid source cache option: %s", cache);
824         return 1;
825     }
826 
827     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
828                    force_share);
829     if (!blk) {
830         return 1;
831     }
832     bs = blk_bs(blk);
833 
834     check = g_new0(ImageCheck, 1);
835     ret = collect_image_check(bs, check, filename, fmt, fix);
836 
837     if (ret == -ENOTSUP) {
838         error_report("This image format does not support checks");
839         ret = 63;
840         goto fail;
841     }
842 
843     if (check->corruptions_fixed || check->leaks_fixed) {
844         int corruptions_fixed, leaks_fixed;
845         bool has_leaks_fixed, has_corruptions_fixed;
846 
847         leaks_fixed         = check->leaks_fixed;
848         has_leaks_fixed     = check->has_leaks_fixed;
849         corruptions_fixed   = check->corruptions_fixed;
850         has_corruptions_fixed = check->has_corruptions_fixed;
851 
852         if (output_format == OFORMAT_HUMAN) {
853             qprintf(quiet,
854                     "The following inconsistencies were found and repaired:\n\n"
855                     "    %" PRId64 " leaked clusters\n"
856                     "    %" PRId64 " corruptions\n\n"
857                     "Double checking the fixed image now...\n",
858                     check->leaks_fixed,
859                     check->corruptions_fixed);
860         }
861 
862         qapi_free_ImageCheck(check);
863         check = g_new0(ImageCheck, 1);
864         ret = collect_image_check(bs, check, filename, fmt, 0);
865 
866         check->leaks_fixed          = leaks_fixed;
867         check->has_leaks_fixed      = has_leaks_fixed;
868         check->corruptions_fixed    = corruptions_fixed;
869         check->has_corruptions_fixed = has_corruptions_fixed;
870     }
871 
872     if (!ret) {
873         switch (output_format) {
874         case OFORMAT_HUMAN:
875             dump_human_image_check(check, quiet);
876             break;
877         case OFORMAT_JSON:
878             dump_json_image_check(check, quiet);
879             break;
880         }
881     }
882 
883     if (ret || check->check_errors) {
884         if (ret) {
885             error_report("Check failed: %s", strerror(-ret));
886         } else {
887             error_report("Check failed");
888         }
889         ret = 1;
890         goto fail;
891     }
892 
893     if (check->corruptions) {
894         ret = 2;
895     } else if (check->leaks) {
896         ret = 3;
897     } else {
898         ret = 0;
899     }
900 
901 fail:
902     qapi_free_ImageCheck(check);
903     blk_unref(blk);
904     return ret;
905 }
906 
907 typedef struct CommonBlockJobCBInfo {
908     BlockDriverState *bs;
909     Error **errp;
910 } CommonBlockJobCBInfo;
911 
912 static void common_block_job_cb(void *opaque, int ret)
913 {
914     CommonBlockJobCBInfo *cbi = opaque;
915 
916     if (ret < 0) {
917         error_setg_errno(cbi->errp, -ret, "Block job failed");
918     }
919 }
920 
921 static void run_block_job(BlockJob *job, Error **errp)
922 {
923     AioContext *aio_context = blk_get_aio_context(job->blk);
924     int ret = 0;
925 
926     aio_context_acquire(aio_context);
927     job_ref(&job->job);
928     do {
929         float progress = 0.0f;
930         aio_poll(aio_context, true);
931         if (job->job.progress.total) {
932             progress = (float)job->job.progress.current /
933                        job->job.progress.total * 100.f;
934         }
935         qemu_progress_print(progress, 0);
936     } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
937 
938     if (!job_is_completed(&job->job)) {
939         ret = job_complete_sync(&job->job, errp);
940     } else {
941         ret = job->job.ret;
942     }
943     job_unref(&job->job);
944     aio_context_release(aio_context);
945 
946     /* publish completion progress only when success */
947     if (!ret) {
948         qemu_progress_print(100.f, 0);
949     }
950 }
951 
952 static int img_commit(int argc, char **argv)
953 {
954     int c, ret, flags;
955     const char *filename, *fmt, *cache, *base;
956     BlockBackend *blk;
957     BlockDriverState *bs, *base_bs;
958     BlockJob *job;
959     bool progress = false, quiet = false, drop = false;
960     bool writethrough;
961     Error *local_err = NULL;
962     CommonBlockJobCBInfo cbi;
963     bool image_opts = false;
964     AioContext *aio_context;
965 
966     fmt = NULL;
967     cache = BDRV_DEFAULT_CACHE;
968     base = NULL;
969     for(;;) {
970         static const struct option long_options[] = {
971             {"help", no_argument, 0, 'h'},
972             {"object", required_argument, 0, OPTION_OBJECT},
973             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
974             {0, 0, 0, 0}
975         };
976         c = getopt_long(argc, argv, ":f:ht:b:dpq",
977                         long_options, NULL);
978         if (c == -1) {
979             break;
980         }
981         switch(c) {
982         case ':':
983             missing_argument(argv[optind - 1]);
984             break;
985         case '?':
986             unrecognized_option(argv[optind - 1]);
987             break;
988         case 'h':
989             help();
990             break;
991         case 'f':
992             fmt = optarg;
993             break;
994         case 't':
995             cache = optarg;
996             break;
997         case 'b':
998             base = optarg;
999             /* -b implies -d */
1000             drop = true;
1001             break;
1002         case 'd':
1003             drop = true;
1004             break;
1005         case 'p':
1006             progress = true;
1007             break;
1008         case 'q':
1009             quiet = true;
1010             break;
1011         case OPTION_OBJECT: {
1012             QemuOpts *opts;
1013             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1014                                            optarg, true);
1015             if (!opts) {
1016                 return 1;
1017             }
1018         }   break;
1019         case OPTION_IMAGE_OPTS:
1020             image_opts = true;
1021             break;
1022         }
1023     }
1024 
1025     /* Progress is not shown in Quiet mode */
1026     if (quiet) {
1027         progress = false;
1028     }
1029 
1030     if (optind != argc - 1) {
1031         error_exit("Expecting one image file name");
1032     }
1033     filename = argv[optind++];
1034 
1035     if (qemu_opts_foreach(&qemu_object_opts,
1036                           user_creatable_add_opts_foreach,
1037                           qemu_img_object_print_help, &error_fatal)) {
1038         return 1;
1039     }
1040 
1041     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
1042     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1043     if (ret < 0) {
1044         error_report("Invalid cache option: %s", cache);
1045         return 1;
1046     }
1047 
1048     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1049                    false);
1050     if (!blk) {
1051         return 1;
1052     }
1053     bs = blk_bs(blk);
1054 
1055     qemu_progress_init(progress, 1.f);
1056     qemu_progress_print(0.f, 100);
1057 
1058     if (base) {
1059         base_bs = bdrv_find_backing_image(bs, base);
1060         if (!base_bs) {
1061             error_setg(&local_err,
1062                        "Did not find '%s' in the backing chain of '%s'",
1063                        base, filename);
1064             goto done;
1065         }
1066     } else {
1067         /* This is different from QMP, which by default uses the deepest file in
1068          * the backing chain (i.e., the very base); however, the traditional
1069          * behavior of qemu-img commit is using the immediate backing file. */
1070         base_bs = backing_bs(bs);
1071         if (!base_bs) {
1072             error_setg(&local_err, "Image does not have a backing file");
1073             goto done;
1074         }
1075     }
1076 
1077     cbi = (CommonBlockJobCBInfo){
1078         .errp = &local_err,
1079         .bs   = bs,
1080     };
1081 
1082     aio_context = bdrv_get_aio_context(bs);
1083     aio_context_acquire(aio_context);
1084     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, 0,
1085                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1086                         &cbi, false, &local_err);
1087     aio_context_release(aio_context);
1088     if (local_err) {
1089         goto done;
1090     }
1091 
1092     /* When the block job completes, the BlockBackend reference will point to
1093      * the old backing file. In order to avoid that the top image is already
1094      * deleted, so we can still empty it afterwards, increment the reference
1095      * counter here preemptively. */
1096     if (!drop) {
1097         bdrv_ref(bs);
1098     }
1099 
1100     job = block_job_get("commit");
1101     assert(job);
1102     run_block_job(job, &local_err);
1103     if (local_err) {
1104         goto unref_backing;
1105     }
1106 
1107     if (!drop && bs->drv->bdrv_make_empty) {
1108         ret = bs->drv->bdrv_make_empty(bs);
1109         if (ret) {
1110             error_setg_errno(&local_err, -ret, "Could not empty %s",
1111                              filename);
1112             goto unref_backing;
1113         }
1114     }
1115 
1116 unref_backing:
1117     if (!drop) {
1118         bdrv_unref(bs);
1119     }
1120 
1121 done:
1122     qemu_progress_end();
1123 
1124     blk_unref(blk);
1125 
1126     if (local_err) {
1127         error_report_err(local_err);
1128         return 1;
1129     }
1130 
1131     qprintf(quiet, "Image committed.\n");
1132     return 0;
1133 }
1134 
1135 /*
1136  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1137  * of the first sector boundary within buf where the sector contains a
1138  * non-zero byte.  This function is robust to a buffer that is not
1139  * sector-aligned.
1140  */
1141 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1142 {
1143     int64_t i;
1144     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1145 
1146     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1147         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1148             return i;
1149         }
1150     }
1151     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1152         return i;
1153     }
1154     return -1;
1155 }
1156 
1157 /*
1158  * Returns true iff the first sector pointed to by 'buf' contains at least
1159  * a non-NUL byte.
1160  *
1161  * 'pnum' is set to the number of sectors (including and immediately following
1162  * the first one) that are known to be in the same allocated/unallocated state.
1163  * The function will try to align the end offset to alignment boundaries so
1164  * that the request will at least end aligned and consequtive requests will
1165  * also start at an aligned offset.
1166  */
1167 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1168                                 int64_t sector_num, int alignment)
1169 {
1170     bool is_zero;
1171     int i, tail;
1172 
1173     if (n <= 0) {
1174         *pnum = 0;
1175         return 0;
1176     }
1177     is_zero = buffer_is_zero(buf, 512);
1178     for(i = 1; i < n; i++) {
1179         buf += 512;
1180         if (is_zero != buffer_is_zero(buf, 512)) {
1181             break;
1182         }
1183     }
1184 
1185     tail = (sector_num + i) & (alignment - 1);
1186     if (tail) {
1187         if (is_zero && i <= tail) {
1188             /* treat unallocated areas which only consist
1189              * of a small tail as allocated. */
1190             is_zero = false;
1191         }
1192         if (!is_zero) {
1193             /* align up end offset of allocated areas. */
1194             i += alignment - tail;
1195             i = MIN(i, n);
1196         } else {
1197             /* align down end offset of zero areas. */
1198             i -= tail;
1199         }
1200     }
1201     *pnum = i;
1202     return !is_zero;
1203 }
1204 
1205 /*
1206  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1207  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1208  * breaking up write requests for only small sparse areas.
1209  */
1210 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1211     int min, int64_t sector_num, int alignment)
1212 {
1213     int ret;
1214     int num_checked, num_used;
1215 
1216     if (n < min) {
1217         min = n;
1218     }
1219 
1220     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1221     if (!ret) {
1222         return ret;
1223     }
1224 
1225     num_used = *pnum;
1226     buf += BDRV_SECTOR_SIZE * *pnum;
1227     n -= *pnum;
1228     sector_num += *pnum;
1229     num_checked = num_used;
1230 
1231     while (n > 0) {
1232         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1233 
1234         buf += BDRV_SECTOR_SIZE * *pnum;
1235         n -= *pnum;
1236         sector_num += *pnum;
1237         num_checked += *pnum;
1238         if (ret) {
1239             num_used = num_checked;
1240         } else if (*pnum >= min) {
1241             break;
1242         }
1243     }
1244 
1245     *pnum = num_used;
1246     return 1;
1247 }
1248 
1249 /*
1250  * Compares two buffers sector by sector. Returns 0 if the first
1251  * sector of each buffer matches, non-zero otherwise.
1252  *
1253  * pnum is set to the sector-aligned size of the buffer prefix that
1254  * has the same matching status as the first sector.
1255  */
1256 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1257                            int64_t bytes, int64_t *pnum)
1258 {
1259     bool res;
1260     int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
1261 
1262     assert(bytes > 0);
1263 
1264     res = !!memcmp(buf1, buf2, i);
1265     while (i < bytes) {
1266         int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
1267 
1268         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1269             break;
1270         }
1271         i += len;
1272     }
1273 
1274     *pnum = i;
1275     return res;
1276 }
1277 
1278 #define IO_BUF_SIZE (2 * MiB)
1279 
1280 /*
1281  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1282  *
1283  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1284  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1285  * failure), and 4 on error (the exit status for read errors), after emitting
1286  * an error message.
1287  *
1288  * @param blk:  BlockBackend for the image
1289  * @param offset: Starting offset to check
1290  * @param bytes: Number of bytes to check
1291  * @param filename: Name of disk file we are checking (logging purpose)
1292  * @param buffer: Allocated buffer for storing read data
1293  * @param quiet: Flag for quiet mode
1294  */
1295 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1296                                int64_t bytes, const char *filename,
1297                                uint8_t *buffer, bool quiet)
1298 {
1299     int ret = 0;
1300     int64_t idx;
1301 
1302     ret = blk_pread(blk, offset, buffer, bytes);
1303     if (ret < 0) {
1304         error_report("Error while reading offset %" PRId64 " of %s: %s",
1305                      offset, filename, strerror(-ret));
1306         return 4;
1307     }
1308     idx = find_nonzero(buffer, bytes);
1309     if (idx >= 0) {
1310         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1311                 offset + idx);
1312         return 1;
1313     }
1314 
1315     return 0;
1316 }
1317 
1318 /*
1319  * Compares two images. Exit codes:
1320  *
1321  * 0 - Images are identical
1322  * 1 - Images differ
1323  * >1 - Error occurred
1324  */
1325 static int img_compare(int argc, char **argv)
1326 {
1327     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1328     BlockBackend *blk1, *blk2;
1329     BlockDriverState *bs1, *bs2;
1330     int64_t total_size1, total_size2;
1331     uint8_t *buf1 = NULL, *buf2 = NULL;
1332     int64_t pnum1, pnum2;
1333     int allocated1, allocated2;
1334     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1335     bool progress = false, quiet = false, strict = false;
1336     int flags;
1337     bool writethrough;
1338     int64_t total_size;
1339     int64_t offset = 0;
1340     int64_t chunk;
1341     int c;
1342     uint64_t progress_base;
1343     bool image_opts = false;
1344     bool force_share = false;
1345 
1346     cache = BDRV_DEFAULT_CACHE;
1347     for (;;) {
1348         static const struct option long_options[] = {
1349             {"help", no_argument, 0, 'h'},
1350             {"object", required_argument, 0, OPTION_OBJECT},
1351             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1352             {"force-share", no_argument, 0, 'U'},
1353             {0, 0, 0, 0}
1354         };
1355         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1356                         long_options, NULL);
1357         if (c == -1) {
1358             break;
1359         }
1360         switch (c) {
1361         case ':':
1362             missing_argument(argv[optind - 1]);
1363             break;
1364         case '?':
1365             unrecognized_option(argv[optind - 1]);
1366             break;
1367         case 'h':
1368             help();
1369             break;
1370         case 'f':
1371             fmt1 = optarg;
1372             break;
1373         case 'F':
1374             fmt2 = optarg;
1375             break;
1376         case 'T':
1377             cache = optarg;
1378             break;
1379         case 'p':
1380             progress = true;
1381             break;
1382         case 'q':
1383             quiet = true;
1384             break;
1385         case 's':
1386             strict = true;
1387             break;
1388         case 'U':
1389             force_share = true;
1390             break;
1391         case OPTION_OBJECT: {
1392             QemuOpts *opts;
1393             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1394                                            optarg, true);
1395             if (!opts) {
1396                 ret = 2;
1397                 goto out4;
1398             }
1399         }   break;
1400         case OPTION_IMAGE_OPTS:
1401             image_opts = true;
1402             break;
1403         }
1404     }
1405 
1406     /* Progress is not shown in Quiet mode */
1407     if (quiet) {
1408         progress = false;
1409     }
1410 
1411 
1412     if (optind != argc - 2) {
1413         error_exit("Expecting two image file names");
1414     }
1415     filename1 = argv[optind++];
1416     filename2 = argv[optind++];
1417 
1418     if (qemu_opts_foreach(&qemu_object_opts,
1419                           user_creatable_add_opts_foreach,
1420                           qemu_img_object_print_help, &error_fatal)) {
1421         ret = 2;
1422         goto out4;
1423     }
1424 
1425     /* Initialize before goto out */
1426     qemu_progress_init(progress, 2.0);
1427 
1428     flags = 0;
1429     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1430     if (ret < 0) {
1431         error_report("Invalid source cache option: %s", cache);
1432         ret = 2;
1433         goto out3;
1434     }
1435 
1436     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1437                     force_share);
1438     if (!blk1) {
1439         ret = 2;
1440         goto out3;
1441     }
1442 
1443     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1444                     force_share);
1445     if (!blk2) {
1446         ret = 2;
1447         goto out2;
1448     }
1449     bs1 = blk_bs(blk1);
1450     bs2 = blk_bs(blk2);
1451 
1452     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1453     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1454     total_size1 = blk_getlength(blk1);
1455     if (total_size1 < 0) {
1456         error_report("Can't get size of %s: %s",
1457                      filename1, strerror(-total_size1));
1458         ret = 4;
1459         goto out;
1460     }
1461     total_size2 = blk_getlength(blk2);
1462     if (total_size2 < 0) {
1463         error_report("Can't get size of %s: %s",
1464                      filename2, strerror(-total_size2));
1465         ret = 4;
1466         goto out;
1467     }
1468     total_size = MIN(total_size1, total_size2);
1469     progress_base = MAX(total_size1, total_size2);
1470 
1471     qemu_progress_print(0, 100);
1472 
1473     if (strict && total_size1 != total_size2) {
1474         ret = 1;
1475         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1476         goto out;
1477     }
1478 
1479     while (offset < total_size) {
1480         int status1, status2;
1481 
1482         status1 = bdrv_block_status_above(bs1, NULL, offset,
1483                                           total_size1 - offset, &pnum1, NULL,
1484                                           NULL);
1485         if (status1 < 0) {
1486             ret = 3;
1487             error_report("Sector allocation test failed for %s", filename1);
1488             goto out;
1489         }
1490         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1491 
1492         status2 = bdrv_block_status_above(bs2, NULL, offset,
1493                                           total_size2 - offset, &pnum2, NULL,
1494                                           NULL);
1495         if (status2 < 0) {
1496             ret = 3;
1497             error_report("Sector allocation test failed for %s", filename2);
1498             goto out;
1499         }
1500         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1501 
1502         assert(pnum1 && pnum2);
1503         chunk = MIN(pnum1, pnum2);
1504 
1505         if (strict) {
1506             if (status1 != status2) {
1507                 ret = 1;
1508                 qprintf(quiet, "Strict mode: Offset %" PRId64
1509                         " block status mismatch!\n", offset);
1510                 goto out;
1511             }
1512         }
1513         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1514             /* nothing to do */
1515         } else if (allocated1 == allocated2) {
1516             if (allocated1) {
1517                 int64_t pnum;
1518 
1519                 chunk = MIN(chunk, IO_BUF_SIZE);
1520                 ret = blk_pread(blk1, offset, buf1, chunk);
1521                 if (ret < 0) {
1522                     error_report("Error while reading offset %" PRId64
1523                                  " of %s: %s",
1524                                  offset, filename1, strerror(-ret));
1525                     ret = 4;
1526                     goto out;
1527                 }
1528                 ret = blk_pread(blk2, offset, buf2, chunk);
1529                 if (ret < 0) {
1530                     error_report("Error while reading offset %" PRId64
1531                                  " of %s: %s",
1532                                  offset, filename2, strerror(-ret));
1533                     ret = 4;
1534                     goto out;
1535                 }
1536                 ret = compare_buffers(buf1, buf2, chunk, &pnum);
1537                 if (ret || pnum != chunk) {
1538                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1539                             offset + (ret ? 0 : pnum));
1540                     ret = 1;
1541                     goto out;
1542                 }
1543             }
1544         } else {
1545             chunk = MIN(chunk, IO_BUF_SIZE);
1546             if (allocated1) {
1547                 ret = check_empty_sectors(blk1, offset, chunk,
1548                                           filename1, buf1, quiet);
1549             } else {
1550                 ret = check_empty_sectors(blk2, offset, chunk,
1551                                           filename2, buf1, quiet);
1552             }
1553             if (ret) {
1554                 goto out;
1555             }
1556         }
1557         offset += chunk;
1558         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1559     }
1560 
1561     if (total_size1 != total_size2) {
1562         BlockBackend *blk_over;
1563         const char *filename_over;
1564 
1565         qprintf(quiet, "Warning: Image size mismatch!\n");
1566         if (total_size1 > total_size2) {
1567             blk_over = blk1;
1568             filename_over = filename1;
1569         } else {
1570             blk_over = blk2;
1571             filename_over = filename2;
1572         }
1573 
1574         while (offset < progress_base) {
1575             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1576                                           progress_base - offset, &chunk,
1577                                           NULL, NULL);
1578             if (ret < 0) {
1579                 ret = 3;
1580                 error_report("Sector allocation test failed for %s",
1581                              filename_over);
1582                 goto out;
1583 
1584             }
1585             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1586                 chunk = MIN(chunk, IO_BUF_SIZE);
1587                 ret = check_empty_sectors(blk_over, offset, chunk,
1588                                           filename_over, buf1, quiet);
1589                 if (ret) {
1590                     goto out;
1591                 }
1592             }
1593             offset += chunk;
1594             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1595         }
1596     }
1597 
1598     qprintf(quiet, "Images are identical.\n");
1599     ret = 0;
1600 
1601 out:
1602     qemu_vfree(buf1);
1603     qemu_vfree(buf2);
1604     blk_unref(blk2);
1605 out2:
1606     blk_unref(blk1);
1607 out3:
1608     qemu_progress_end();
1609 out4:
1610     return ret;
1611 }
1612 
1613 enum ImgConvertBlockStatus {
1614     BLK_DATA,
1615     BLK_ZERO,
1616     BLK_BACKING_FILE,
1617 };
1618 
1619 #define MAX_COROUTINES 16
1620 
1621 typedef struct ImgConvertState {
1622     BlockBackend **src;
1623     int64_t *src_sectors;
1624     int src_num;
1625     int64_t total_sectors;
1626     int64_t allocated_sectors;
1627     int64_t allocated_done;
1628     int64_t sector_num;
1629     int64_t wr_offs;
1630     enum ImgConvertBlockStatus status;
1631     int64_t sector_next_status;
1632     BlockBackend *target;
1633     bool has_zero_init;
1634     bool compressed;
1635     bool unallocated_blocks_are_zero;
1636     bool target_is_new;
1637     bool target_has_backing;
1638     int64_t target_backing_sectors; /* negative if unknown */
1639     bool wr_in_order;
1640     bool copy_range;
1641     bool salvage;
1642     bool quiet;
1643     int min_sparse;
1644     int alignment;
1645     size_t cluster_sectors;
1646     size_t buf_sectors;
1647     long num_coroutines;
1648     int running_coroutines;
1649     Coroutine *co[MAX_COROUTINES];
1650     int64_t wait_sector_num[MAX_COROUTINES];
1651     CoMutex lock;
1652     int ret;
1653 } ImgConvertState;
1654 
1655 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1656                                 int *src_cur, int64_t *src_cur_offset)
1657 {
1658     *src_cur = 0;
1659     *src_cur_offset = 0;
1660     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1661         *src_cur_offset += s->src_sectors[*src_cur];
1662         (*src_cur)++;
1663         assert(*src_cur < s->src_num);
1664     }
1665 }
1666 
1667 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1668 {
1669     int64_t src_cur_offset;
1670     int ret, n, src_cur;
1671     bool post_backing_zero = false;
1672 
1673     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1674 
1675     assert(s->total_sectors > sector_num);
1676     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1677 
1678     if (s->target_backing_sectors >= 0) {
1679         if (sector_num >= s->target_backing_sectors) {
1680             post_backing_zero = s->unallocated_blocks_are_zero;
1681         } else if (sector_num + n > s->target_backing_sectors) {
1682             /* Split requests around target_backing_sectors (because
1683              * starting from there, zeros are handled differently) */
1684             n = s->target_backing_sectors - sector_num;
1685         }
1686     }
1687 
1688     if (s->sector_next_status <= sector_num) {
1689         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1690         int64_t count;
1691 
1692         do {
1693             count = n * BDRV_SECTOR_SIZE;
1694 
1695             if (s->target_has_backing) {
1696                 ret = bdrv_block_status(blk_bs(s->src[src_cur]), offset,
1697                                         count, &count, NULL, NULL);
1698             } else {
1699                 ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL,
1700                                               offset, count, &count, NULL,
1701                                               NULL);
1702             }
1703 
1704             if (ret < 0) {
1705                 if (s->salvage) {
1706                     if (n == 1) {
1707                         if (!s->quiet) {
1708                             warn_report("error while reading block status at "
1709                                         "offset %" PRIu64 ": %s", offset,
1710                                         strerror(-ret));
1711                         }
1712                         /* Just try to read the data, then */
1713                         ret = BDRV_BLOCK_DATA;
1714                         count = BDRV_SECTOR_SIZE;
1715                     } else {
1716                         /* Retry on a shorter range */
1717                         n = DIV_ROUND_UP(n, 4);
1718                     }
1719                 } else {
1720                     error_report("error while reading block status at offset "
1721                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1722                     return ret;
1723                 }
1724             }
1725         } while (ret < 0);
1726 
1727         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1728 
1729         if (ret & BDRV_BLOCK_ZERO) {
1730             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1731         } else if (ret & BDRV_BLOCK_DATA) {
1732             s->status = BLK_DATA;
1733         } else {
1734             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1735         }
1736 
1737         s->sector_next_status = sector_num + n;
1738     }
1739 
1740     n = MIN(n, s->sector_next_status - sector_num);
1741     if (s->status == BLK_DATA) {
1742         n = MIN(n, s->buf_sectors);
1743     }
1744 
1745     /* We need to write complete clusters for compressed images, so if an
1746      * unallocated area is shorter than that, we must consider the whole
1747      * cluster allocated. */
1748     if (s->compressed) {
1749         if (n < s->cluster_sectors) {
1750             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1751             s->status = BLK_DATA;
1752         } else {
1753             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1754         }
1755     }
1756 
1757     return n;
1758 }
1759 
1760 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1761                                         int nb_sectors, uint8_t *buf)
1762 {
1763     uint64_t single_read_until = 0;
1764     int n, ret;
1765 
1766     assert(nb_sectors <= s->buf_sectors);
1767     while (nb_sectors > 0) {
1768         BlockBackend *blk;
1769         int src_cur;
1770         int64_t bs_sectors, src_cur_offset;
1771         uint64_t offset;
1772 
1773         /* In the case of compression with multiple source files, we can get a
1774          * nb_sectors that spreads into the next part. So we must be able to
1775          * read across multiple BDSes for one convert_read() call. */
1776         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1777         blk = s->src[src_cur];
1778         bs_sectors = s->src_sectors[src_cur];
1779 
1780         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1781 
1782         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1783         if (single_read_until > offset) {
1784             n = 1;
1785         }
1786 
1787         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1788         if (ret < 0) {
1789             if (s->salvage) {
1790                 if (n > 1) {
1791                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1792                     continue;
1793                 } else {
1794                     if (!s->quiet) {
1795                         warn_report("error while reading offset %" PRIu64
1796                                     ": %s", offset, strerror(-ret));
1797                     }
1798                     memset(buf, 0, BDRV_SECTOR_SIZE);
1799                 }
1800             } else {
1801                 return ret;
1802             }
1803         }
1804 
1805         sector_num += n;
1806         nb_sectors -= n;
1807         buf += n * BDRV_SECTOR_SIZE;
1808     }
1809 
1810     return 0;
1811 }
1812 
1813 
1814 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1815                                          int nb_sectors, uint8_t *buf,
1816                                          enum ImgConvertBlockStatus status)
1817 {
1818     int ret;
1819 
1820     while (nb_sectors > 0) {
1821         int n = nb_sectors;
1822         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1823 
1824         switch (status) {
1825         case BLK_BACKING_FILE:
1826             /* If we have a backing file, leave clusters unallocated that are
1827              * unallocated in the source image, so that the backing file is
1828              * visible at the respective offset. */
1829             assert(s->target_has_backing);
1830             break;
1831 
1832         case BLK_DATA:
1833             /* If we're told to keep the target fully allocated (-S 0) or there
1834              * is real non-zero data, we must write it. Otherwise we can treat
1835              * it as zero sectors.
1836              * Compressed clusters need to be written as a whole, so in that
1837              * case we can only save the write if the buffer is completely
1838              * zeroed. */
1839             if (!s->min_sparse ||
1840                 (!s->compressed &&
1841                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1842                                           sector_num, s->alignment)) ||
1843                 (s->compressed &&
1844                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1845             {
1846                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1847                                     n << BDRV_SECTOR_BITS, buf, flags);
1848                 if (ret < 0) {
1849                     return ret;
1850                 }
1851                 break;
1852             }
1853             /* fall-through */
1854 
1855         case BLK_ZERO:
1856             if (s->has_zero_init) {
1857                 assert(!s->target_has_backing);
1858                 break;
1859             }
1860             ret = blk_co_pwrite_zeroes(s->target,
1861                                        sector_num << BDRV_SECTOR_BITS,
1862                                        n << BDRV_SECTOR_BITS,
1863                                        BDRV_REQ_MAY_UNMAP);
1864             if (ret < 0) {
1865                 return ret;
1866             }
1867             break;
1868         }
1869 
1870         sector_num += n;
1871         nb_sectors -= n;
1872         buf += n * BDRV_SECTOR_SIZE;
1873     }
1874 
1875     return 0;
1876 }
1877 
1878 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1879                                               int nb_sectors)
1880 {
1881     int n, ret;
1882 
1883     while (nb_sectors > 0) {
1884         BlockBackend *blk;
1885         int src_cur;
1886         int64_t bs_sectors, src_cur_offset;
1887         int64_t offset;
1888 
1889         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1890         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1891         blk = s->src[src_cur];
1892         bs_sectors = s->src_sectors[src_cur];
1893 
1894         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1895 
1896         ret = blk_co_copy_range(blk, offset, s->target,
1897                                 sector_num << BDRV_SECTOR_BITS,
1898                                 n << BDRV_SECTOR_BITS, 0, 0);
1899         if (ret < 0) {
1900             return ret;
1901         }
1902 
1903         sector_num += n;
1904         nb_sectors -= n;
1905     }
1906     return 0;
1907 }
1908 
1909 static void coroutine_fn convert_co_do_copy(void *opaque)
1910 {
1911     ImgConvertState *s = opaque;
1912     uint8_t *buf = NULL;
1913     int ret, i;
1914     int index = -1;
1915 
1916     for (i = 0; i < s->num_coroutines; i++) {
1917         if (s->co[i] == qemu_coroutine_self()) {
1918             index = i;
1919             break;
1920         }
1921     }
1922     assert(index >= 0);
1923 
1924     s->running_coroutines++;
1925     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1926 
1927     while (1) {
1928         int n;
1929         int64_t sector_num;
1930         enum ImgConvertBlockStatus status;
1931         bool copy_range;
1932 
1933         qemu_co_mutex_lock(&s->lock);
1934         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1935             qemu_co_mutex_unlock(&s->lock);
1936             break;
1937         }
1938         n = convert_iteration_sectors(s, s->sector_num);
1939         if (n < 0) {
1940             qemu_co_mutex_unlock(&s->lock);
1941             s->ret = n;
1942             break;
1943         }
1944         /* save current sector and allocation status to local variables */
1945         sector_num = s->sector_num;
1946         status = s->status;
1947         if (!s->min_sparse && s->status == BLK_ZERO) {
1948             n = MIN(n, s->buf_sectors);
1949         }
1950         /* increment global sector counter so that other coroutines can
1951          * already continue reading beyond this request */
1952         s->sector_num += n;
1953         qemu_co_mutex_unlock(&s->lock);
1954 
1955         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
1956             s->allocated_done += n;
1957             qemu_progress_print(100.0 * s->allocated_done /
1958                                         s->allocated_sectors, 0);
1959         }
1960 
1961 retry:
1962         copy_range = s->copy_range && s->status == BLK_DATA;
1963         if (status == BLK_DATA && !copy_range) {
1964             ret = convert_co_read(s, sector_num, n, buf);
1965             if (ret < 0) {
1966                 error_report("error while reading at byte %lld: %s",
1967                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
1968                 s->ret = ret;
1969             }
1970         } else if (!s->min_sparse && status == BLK_ZERO) {
1971             status = BLK_DATA;
1972             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
1973         }
1974 
1975         if (s->wr_in_order) {
1976             /* keep writes in order */
1977             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
1978                 s->wait_sector_num[index] = sector_num;
1979                 qemu_coroutine_yield();
1980             }
1981             s->wait_sector_num[index] = -1;
1982         }
1983 
1984         if (s->ret == -EINPROGRESS) {
1985             if (copy_range) {
1986                 ret = convert_co_copy_range(s, sector_num, n);
1987                 if (ret) {
1988                     s->copy_range = false;
1989                     goto retry;
1990                 }
1991             } else {
1992                 ret = convert_co_write(s, sector_num, n, buf, status);
1993             }
1994             if (ret < 0) {
1995                 error_report("error while writing at byte %lld: %s",
1996                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
1997                 s->ret = ret;
1998             }
1999         }
2000 
2001         if (s->wr_in_order) {
2002             /* reenter the coroutine that might have waited
2003              * for this write to complete */
2004             s->wr_offs = sector_num + n;
2005             for (i = 0; i < s->num_coroutines; i++) {
2006                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
2007                     /*
2008                      * A -> B -> A cannot occur because A has
2009                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
2010                      * B will never enter A during this time window.
2011                      */
2012                     qemu_coroutine_enter(s->co[i]);
2013                     break;
2014                 }
2015             }
2016         }
2017     }
2018 
2019     qemu_vfree(buf);
2020     s->co[index] = NULL;
2021     s->running_coroutines--;
2022     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
2023         /* the convert job finished successfully */
2024         s->ret = 0;
2025     }
2026 }
2027 
2028 static int convert_do_copy(ImgConvertState *s)
2029 {
2030     int ret, i, n;
2031     int64_t sector_num = 0;
2032 
2033     /* Check whether we have zero initialisation or can get it efficiently */
2034     if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
2035         !s->target_has_backing) {
2036         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
2037     }
2038 
2039     if (!s->has_zero_init && !s->target_has_backing &&
2040         bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
2041     {
2042         ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
2043         if (ret == 0) {
2044             s->has_zero_init = true;
2045         }
2046     }
2047 
2048     /* Allocate buffer for copied data. For compressed images, only one cluster
2049      * can be copied at a time. */
2050     if (s->compressed) {
2051         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2052             error_report("invalid cluster size");
2053             return -EINVAL;
2054         }
2055         s->buf_sectors = s->cluster_sectors;
2056     }
2057 
2058     while (sector_num < s->total_sectors) {
2059         n = convert_iteration_sectors(s, sector_num);
2060         if (n < 0) {
2061             return n;
2062         }
2063         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2064         {
2065             s->allocated_sectors += n;
2066         }
2067         sector_num += n;
2068     }
2069 
2070     /* Do the copy */
2071     s->sector_next_status = 0;
2072     s->ret = -EINPROGRESS;
2073 
2074     qemu_co_mutex_init(&s->lock);
2075     for (i = 0; i < s->num_coroutines; i++) {
2076         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2077         s->wait_sector_num[i] = -1;
2078         qemu_coroutine_enter(s->co[i]);
2079     }
2080 
2081     while (s->running_coroutines) {
2082         main_loop_wait(false);
2083     }
2084 
2085     if (s->compressed && !s->ret) {
2086         /* signal EOF to align */
2087         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
2088         if (ret < 0) {
2089             return ret;
2090         }
2091     }
2092 
2093     return s->ret;
2094 }
2095 
2096 #define MAX_BUF_SECTORS 32768
2097 
2098 static int img_convert(int argc, char **argv)
2099 {
2100     int c, bs_i, flags, src_flags = 0;
2101     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2102                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2103                *out_filename, *out_baseimg_param, *snapshot_name = NULL;
2104     BlockDriver *drv = NULL, *proto_drv = NULL;
2105     BlockDriverInfo bdi;
2106     BlockDriverState *out_bs;
2107     QemuOpts *opts = NULL, *sn_opts = NULL;
2108     QemuOptsList *create_opts = NULL;
2109     QDict *open_opts = NULL;
2110     char *options = NULL;
2111     Error *local_err = NULL;
2112     bool writethrough, src_writethrough, image_opts = false,
2113          skip_create = false, progress = false, tgt_image_opts = false;
2114     int64_t ret = -EINVAL;
2115     bool force_share = false;
2116     bool explict_min_sparse = false;
2117 
2118     ImgConvertState s = (ImgConvertState) {
2119         /* Need at least 4k of zeros for sparse detection */
2120         .min_sparse         = 8,
2121         .copy_range         = false,
2122         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2123         .wr_in_order        = true,
2124         .num_coroutines     = 8,
2125     };
2126 
2127     for(;;) {
2128         static const struct option long_options[] = {
2129             {"help", no_argument, 0, 'h'},
2130             {"object", required_argument, 0, OPTION_OBJECT},
2131             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2132             {"force-share", no_argument, 0, 'U'},
2133             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2134             {"salvage", no_argument, 0, OPTION_SALVAGE},
2135             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2136             {0, 0, 0, 0}
2137         };
2138         c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU",
2139                         long_options, NULL);
2140         if (c == -1) {
2141             break;
2142         }
2143         switch(c) {
2144         case ':':
2145             missing_argument(argv[optind - 1]);
2146             break;
2147         case '?':
2148             unrecognized_option(argv[optind - 1]);
2149             break;
2150         case 'h':
2151             help();
2152             break;
2153         case 'f':
2154             fmt = optarg;
2155             break;
2156         case 'O':
2157             out_fmt = optarg;
2158             break;
2159         case 'B':
2160             out_baseimg = optarg;
2161             break;
2162         case 'C':
2163             s.copy_range = true;
2164             break;
2165         case 'c':
2166             s.compressed = true;
2167             break;
2168         case 'o':
2169             if (accumulate_options(&options, optarg) < 0) {
2170                 goto fail_getopt;
2171             }
2172             break;
2173         case 'l':
2174             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2175                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2176                                                   optarg, false);
2177                 if (!sn_opts) {
2178                     error_report("Failed in parsing snapshot param '%s'",
2179                                  optarg);
2180                     goto fail_getopt;
2181                 }
2182             } else {
2183                 snapshot_name = optarg;
2184             }
2185             break;
2186         case 'S':
2187         {
2188             int64_t sval;
2189 
2190             sval = cvtnum(optarg);
2191             if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2192                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2193                 error_report("Invalid buffer size for sparse output specified. "
2194                     "Valid sizes are multiples of %llu up to %llu. Select "
2195                     "0 to disable sparse detection (fully allocates output).",
2196                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2197                 goto fail_getopt;
2198             }
2199 
2200             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2201             explict_min_sparse = true;
2202             break;
2203         }
2204         case 'p':
2205             progress = true;
2206             break;
2207         case 't':
2208             cache = optarg;
2209             break;
2210         case 'T':
2211             src_cache = optarg;
2212             break;
2213         case 'q':
2214             s.quiet = true;
2215             break;
2216         case 'n':
2217             skip_create = true;
2218             break;
2219         case 'm':
2220             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2221                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2222                 error_report("Invalid number of coroutines. Allowed number of"
2223                              " coroutines is between 1 and %d", MAX_COROUTINES);
2224                 goto fail_getopt;
2225             }
2226             break;
2227         case 'W':
2228             s.wr_in_order = false;
2229             break;
2230         case 'U':
2231             force_share = true;
2232             break;
2233         case OPTION_OBJECT: {
2234             QemuOpts *object_opts;
2235             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
2236                                                   optarg, true);
2237             if (!object_opts) {
2238                 goto fail_getopt;
2239             }
2240             break;
2241         }
2242         case OPTION_IMAGE_OPTS:
2243             image_opts = true;
2244             break;
2245         case OPTION_SALVAGE:
2246             s.salvage = true;
2247             break;
2248         case OPTION_TARGET_IMAGE_OPTS:
2249             tgt_image_opts = true;
2250             break;
2251         case OPTION_TARGET_IS_ZERO:
2252             /*
2253              * The user asserting that the target is blank has the
2254              * same effect as the target driver supporting zero
2255              * initialisation.
2256              */
2257             s.has_zero_init = true;
2258             break;
2259         }
2260     }
2261 
2262     if (!out_fmt && !tgt_image_opts) {
2263         out_fmt = "raw";
2264     }
2265 
2266     if (qemu_opts_foreach(&qemu_object_opts,
2267                           user_creatable_add_opts_foreach,
2268                           qemu_img_object_print_help, &error_fatal)) {
2269         goto fail_getopt;
2270     }
2271 
2272     if (s.compressed && s.copy_range) {
2273         error_report("Cannot enable copy offloading when -c is used");
2274         goto fail_getopt;
2275     }
2276 
2277     if (explict_min_sparse && s.copy_range) {
2278         error_report("Cannot enable copy offloading when -S is used");
2279         goto fail_getopt;
2280     }
2281 
2282     if (s.copy_range && s.salvage) {
2283         error_report("Cannot use copy offloading in salvaging mode");
2284         goto fail_getopt;
2285     }
2286 
2287     if (tgt_image_opts && !skip_create) {
2288         error_report("--target-image-opts requires use of -n flag");
2289         goto fail_getopt;
2290     }
2291 
2292     if (skip_create && options) {
2293         warn_report("-o has no effect when skipping image creation");
2294         warn_report("This will become an error in future QEMU versions.");
2295     }
2296 
2297     if (s.has_zero_init && !skip_create) {
2298         error_report("--target-is-zero requires use of -n flag");
2299         goto fail_getopt;
2300     }
2301 
2302     s.src_num = argc - optind - 1;
2303     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2304 
2305     if (options && has_help_option(options)) {
2306         if (out_fmt) {
2307             ret = print_block_option_help(out_filename, out_fmt);
2308             goto fail_getopt;
2309         } else {
2310             error_report("Option help requires a format be specified");
2311             goto fail_getopt;
2312         }
2313     }
2314 
2315     if (s.src_num < 1) {
2316         error_report("Must specify image file name");
2317         goto fail_getopt;
2318     }
2319 
2320 
2321     /* ret is still -EINVAL until here */
2322     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2323     if (ret < 0) {
2324         error_report("Invalid source cache option: %s", src_cache);
2325         goto fail_getopt;
2326     }
2327 
2328     /* Initialize before goto out */
2329     if (s.quiet) {
2330         progress = false;
2331     }
2332     qemu_progress_init(progress, 1.0);
2333     qemu_progress_print(0, 100);
2334 
2335     s.src = g_new0(BlockBackend *, s.src_num);
2336     s.src_sectors = g_new(int64_t, s.src_num);
2337 
2338     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2339         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2340                                fmt, src_flags, src_writethrough, s.quiet,
2341                                force_share);
2342         if (!s.src[bs_i]) {
2343             ret = -1;
2344             goto out;
2345         }
2346         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2347         if (s.src_sectors[bs_i] < 0) {
2348             error_report("Could not get size of %s: %s",
2349                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2350             ret = -1;
2351             goto out;
2352         }
2353         s.total_sectors += s.src_sectors[bs_i];
2354     }
2355 
2356     if (sn_opts) {
2357         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2358                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2359                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2360                                &local_err);
2361     } else if (snapshot_name != NULL) {
2362         if (s.src_num > 1) {
2363             error_report("No support for concatenating multiple snapshot");
2364             ret = -1;
2365             goto out;
2366         }
2367 
2368         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2369                                              &local_err);
2370     }
2371     if (local_err) {
2372         error_reportf_err(local_err, "Failed to load snapshot: ");
2373         ret = -1;
2374         goto out;
2375     }
2376 
2377     if (!skip_create) {
2378         /* Find driver and parse its options */
2379         drv = bdrv_find_format(out_fmt);
2380         if (!drv) {
2381             error_report("Unknown file format '%s'", out_fmt);
2382             ret = -1;
2383             goto out;
2384         }
2385 
2386         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2387         if (!proto_drv) {
2388             error_report_err(local_err);
2389             ret = -1;
2390             goto out;
2391         }
2392 
2393         if (!drv->create_opts) {
2394             error_report("Format driver '%s' does not support image creation",
2395                          drv->format_name);
2396             ret = -1;
2397             goto out;
2398         }
2399 
2400         if (!proto_drv->create_opts) {
2401             error_report("Protocol driver '%s' does not support image creation",
2402                          proto_drv->format_name);
2403             ret = -1;
2404             goto out;
2405         }
2406 
2407         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2408         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2409 
2410         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2411         if (options) {
2412             qemu_opts_do_parse(opts, options, NULL, &local_err);
2413             if (local_err) {
2414                 error_report_err(local_err);
2415                 ret = -1;
2416                 goto out;
2417             }
2418         }
2419 
2420         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s.total_sectors * 512,
2421                             &error_abort);
2422         ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2423         if (ret < 0) {
2424             goto out;
2425         }
2426     }
2427 
2428     /* Get backing file name if -o backing_file was used */
2429     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2430     if (out_baseimg_param) {
2431         out_baseimg = out_baseimg_param;
2432     }
2433     s.target_has_backing = (bool) out_baseimg;
2434 
2435     if (s.has_zero_init && s.target_has_backing) {
2436         error_report("Cannot use --target-is-zero when the destination "
2437                      "image has a backing file");
2438         goto out;
2439     }
2440 
2441     if (s.src_num > 1 && out_baseimg) {
2442         error_report("Having a backing file for the target makes no sense when "
2443                      "concatenating multiple input images");
2444         ret = -1;
2445         goto out;
2446     }
2447 
2448     /* Check if compression is supported */
2449     if (s.compressed) {
2450         bool encryption =
2451             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2452         const char *encryptfmt =
2453             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2454         const char *preallocation =
2455             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2456 
2457         if (drv && !block_driver_can_compress(drv)) {
2458             error_report("Compression not supported for this file format");
2459             ret = -1;
2460             goto out;
2461         }
2462 
2463         if (encryption || encryptfmt) {
2464             error_report("Compression and encryption not supported at "
2465                          "the same time");
2466             ret = -1;
2467             goto out;
2468         }
2469 
2470         if (preallocation
2471             && strcmp(preallocation, "off"))
2472         {
2473             error_report("Compression and preallocation not supported at "
2474                          "the same time");
2475             ret = -1;
2476             goto out;
2477         }
2478     }
2479 
2480     /*
2481      * The later open call will need any decryption secrets, and
2482      * bdrv_create() will purge "opts", so extract them now before
2483      * they are lost.
2484      */
2485     if (!skip_create) {
2486         open_opts = qdict_new();
2487         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2488     }
2489 
2490     if (!skip_create) {
2491         /* Create the new image */
2492         ret = bdrv_create(drv, out_filename, opts, &local_err);
2493         if (ret < 0) {
2494             error_reportf_err(local_err, "%s: error while converting %s: ",
2495                               out_filename, out_fmt);
2496             goto out;
2497         }
2498     }
2499 
2500     s.target_is_new = !skip_create;
2501 
2502     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2503     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2504     if (ret < 0) {
2505         error_report("Invalid cache option: %s", cache);
2506         goto out;
2507     }
2508 
2509     if (skip_create) {
2510         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2511                             flags, writethrough, s.quiet, false);
2512     } else {
2513         /* TODO ultimately we should allow --target-image-opts
2514          * to be used even when -n is not given.
2515          * That has to wait for bdrv_create to be improved
2516          * to allow filenames in option syntax
2517          */
2518         s.target = img_open_file(out_filename, open_opts, out_fmt,
2519                                  flags, writethrough, s.quiet, false);
2520         open_opts = NULL; /* blk_new_open will have freed it */
2521     }
2522     if (!s.target) {
2523         ret = -1;
2524         goto out;
2525     }
2526     out_bs = blk_bs(s.target);
2527 
2528     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2529         error_report("Compression not supported for this file format");
2530         ret = -1;
2531         goto out;
2532     }
2533 
2534     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2535      * or discard_alignment of the out_bs is greater. Limit to
2536      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2537     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2538                         MAX(s.buf_sectors,
2539                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2540                                 out_bs->bl.pdiscard_alignment >>
2541                                 BDRV_SECTOR_BITS)));
2542 
2543     /* try to align the write requests to the destination to avoid unnecessary
2544      * RMW cycles. */
2545     s.alignment = MAX(pow2floor(s.min_sparse),
2546                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2547                                    BDRV_SECTOR_SIZE));
2548     assert(is_power_of_2(s.alignment));
2549 
2550     if (skip_create) {
2551         int64_t output_sectors = blk_nb_sectors(s.target);
2552         if (output_sectors < 0) {
2553             error_report("unable to get output image length: %s",
2554                          strerror(-output_sectors));
2555             ret = -1;
2556             goto out;
2557         } else if (output_sectors < s.total_sectors) {
2558             error_report("output file is smaller than input file");
2559             ret = -1;
2560             goto out;
2561         }
2562     }
2563 
2564     if (s.target_has_backing && s.target_is_new) {
2565         /* Errors are treated as "backing length unknown" (which means
2566          * s.target_backing_sectors has to be negative, which it will
2567          * be automatically).  The backing file length is used only
2568          * for optimizations, so such a case is not fatal. */
2569         s.target_backing_sectors = bdrv_nb_sectors(out_bs->backing->bs);
2570     } else {
2571         s.target_backing_sectors = -1;
2572     }
2573 
2574     ret = bdrv_get_info(out_bs, &bdi);
2575     if (ret < 0) {
2576         if (s.compressed) {
2577             error_report("could not get block driver info");
2578             goto out;
2579         }
2580     } else {
2581         s.compressed = s.compressed || bdi.needs_compressed_writes;
2582         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2583         s.unallocated_blocks_are_zero = bdi.unallocated_blocks_are_zero;
2584     }
2585 
2586     ret = convert_do_copy(&s);
2587 out:
2588     if (!ret) {
2589         qemu_progress_print(100, 0);
2590     }
2591     qemu_progress_end();
2592     qemu_opts_del(opts);
2593     qemu_opts_free(create_opts);
2594     qemu_opts_del(sn_opts);
2595     qobject_unref(open_opts);
2596     blk_unref(s.target);
2597     if (s.src) {
2598         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2599             blk_unref(s.src[bs_i]);
2600         }
2601         g_free(s.src);
2602     }
2603     g_free(s.src_sectors);
2604 fail_getopt:
2605     g_free(options);
2606 
2607     return !!ret;
2608 }
2609 
2610 
2611 static void dump_snapshots(BlockDriverState *bs)
2612 {
2613     QEMUSnapshotInfo *sn_tab, *sn;
2614     int nb_sns, i;
2615 
2616     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2617     if (nb_sns <= 0)
2618         return;
2619     printf("Snapshot list:\n");
2620     bdrv_snapshot_dump(NULL);
2621     printf("\n");
2622     for(i = 0; i < nb_sns; i++) {
2623         sn = &sn_tab[i];
2624         bdrv_snapshot_dump(sn);
2625         printf("\n");
2626     }
2627     g_free(sn_tab);
2628 }
2629 
2630 static void dump_json_image_info_list(ImageInfoList *list)
2631 {
2632     QString *str;
2633     QObject *obj;
2634     Visitor *v = qobject_output_visitor_new(&obj);
2635 
2636     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2637     visit_complete(v, &obj);
2638     str = qobject_to_json_pretty(obj);
2639     assert(str != NULL);
2640     printf("%s\n", qstring_get_str(str));
2641     qobject_unref(obj);
2642     visit_free(v);
2643     qobject_unref(str);
2644 }
2645 
2646 static void dump_json_image_info(ImageInfo *info)
2647 {
2648     QString *str;
2649     QObject *obj;
2650     Visitor *v = qobject_output_visitor_new(&obj);
2651 
2652     visit_type_ImageInfo(v, NULL, &info, &error_abort);
2653     visit_complete(v, &obj);
2654     str = qobject_to_json_pretty(obj);
2655     assert(str != NULL);
2656     printf("%s\n", qstring_get_str(str));
2657     qobject_unref(obj);
2658     visit_free(v);
2659     qobject_unref(str);
2660 }
2661 
2662 static void dump_human_image_info_list(ImageInfoList *list)
2663 {
2664     ImageInfoList *elem;
2665     bool delim = false;
2666 
2667     for (elem = list; elem; elem = elem->next) {
2668         if (delim) {
2669             printf("\n");
2670         }
2671         delim = true;
2672 
2673         bdrv_image_info_dump(elem->value);
2674     }
2675 }
2676 
2677 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2678 {
2679     return strcmp(a, b) == 0;
2680 }
2681 
2682 /**
2683  * Open an image file chain and return an ImageInfoList
2684  *
2685  * @filename: topmost image filename
2686  * @fmt: topmost image format (may be NULL to autodetect)
2687  * @chain: true  - enumerate entire backing file chain
2688  *         false - only topmost image file
2689  *
2690  * Returns a list of ImageInfo objects or NULL if there was an error opening an
2691  * image file.  If there was an error a message will have been printed to
2692  * stderr.
2693  */
2694 static ImageInfoList *collect_image_info_list(bool image_opts,
2695                                               const char *filename,
2696                                               const char *fmt,
2697                                               bool chain, bool force_share)
2698 {
2699     ImageInfoList *head = NULL;
2700     ImageInfoList **last = &head;
2701     GHashTable *filenames;
2702     Error *err = NULL;
2703 
2704     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2705 
2706     while (filename) {
2707         BlockBackend *blk;
2708         BlockDriverState *bs;
2709         ImageInfo *info;
2710         ImageInfoList *elem;
2711 
2712         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2713             error_report("Backing file '%s' creates an infinite loop.",
2714                          filename);
2715             goto err;
2716         }
2717         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2718 
2719         blk = img_open(image_opts, filename, fmt,
2720                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2721                        force_share);
2722         if (!blk) {
2723             goto err;
2724         }
2725         bs = blk_bs(blk);
2726 
2727         bdrv_query_image_info(bs, &info, &err);
2728         if (err) {
2729             error_report_err(err);
2730             blk_unref(blk);
2731             goto err;
2732         }
2733 
2734         elem = g_new0(ImageInfoList, 1);
2735         elem->value = info;
2736         *last = elem;
2737         last = &elem->next;
2738 
2739         blk_unref(blk);
2740 
2741         /* Clear parameters that only apply to the topmost image */
2742         filename = fmt = NULL;
2743         image_opts = false;
2744 
2745         if (chain) {
2746             if (info->has_full_backing_filename) {
2747                 filename = info->full_backing_filename;
2748             } else if (info->has_backing_filename) {
2749                 error_report("Could not determine absolute backing filename,"
2750                              " but backing filename '%s' present",
2751                              info->backing_filename);
2752                 goto err;
2753             }
2754             if (info->has_backing_filename_format) {
2755                 fmt = info->backing_filename_format;
2756             }
2757         }
2758     }
2759     g_hash_table_destroy(filenames);
2760     return head;
2761 
2762 err:
2763     qapi_free_ImageInfoList(head);
2764     g_hash_table_destroy(filenames);
2765     return NULL;
2766 }
2767 
2768 static int img_info(int argc, char **argv)
2769 {
2770     int c;
2771     OutputFormat output_format = OFORMAT_HUMAN;
2772     bool chain = false;
2773     const char *filename, *fmt, *output;
2774     ImageInfoList *list;
2775     bool image_opts = false;
2776     bool force_share = false;
2777 
2778     fmt = NULL;
2779     output = NULL;
2780     for(;;) {
2781         int option_index = 0;
2782         static const struct option long_options[] = {
2783             {"help", no_argument, 0, 'h'},
2784             {"format", required_argument, 0, 'f'},
2785             {"output", required_argument, 0, OPTION_OUTPUT},
2786             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2787             {"object", required_argument, 0, OPTION_OBJECT},
2788             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2789             {"force-share", no_argument, 0, 'U'},
2790             {0, 0, 0, 0}
2791         };
2792         c = getopt_long(argc, argv, ":f:hU",
2793                         long_options, &option_index);
2794         if (c == -1) {
2795             break;
2796         }
2797         switch(c) {
2798         case ':':
2799             missing_argument(argv[optind - 1]);
2800             break;
2801         case '?':
2802             unrecognized_option(argv[optind - 1]);
2803             break;
2804         case 'h':
2805             help();
2806             break;
2807         case 'f':
2808             fmt = optarg;
2809             break;
2810         case 'U':
2811             force_share = true;
2812             break;
2813         case OPTION_OUTPUT:
2814             output = optarg;
2815             break;
2816         case OPTION_BACKING_CHAIN:
2817             chain = true;
2818             break;
2819         case OPTION_OBJECT: {
2820             QemuOpts *opts;
2821             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2822                                            optarg, true);
2823             if (!opts) {
2824                 return 1;
2825             }
2826         }   break;
2827         case OPTION_IMAGE_OPTS:
2828             image_opts = true;
2829             break;
2830         }
2831     }
2832     if (optind != argc - 1) {
2833         error_exit("Expecting one image file name");
2834     }
2835     filename = argv[optind++];
2836 
2837     if (output && !strcmp(output, "json")) {
2838         output_format = OFORMAT_JSON;
2839     } else if (output && !strcmp(output, "human")) {
2840         output_format = OFORMAT_HUMAN;
2841     } else if (output) {
2842         error_report("--output must be used with human or json as argument.");
2843         return 1;
2844     }
2845 
2846     if (qemu_opts_foreach(&qemu_object_opts,
2847                           user_creatable_add_opts_foreach,
2848                           qemu_img_object_print_help, &error_fatal)) {
2849         return 1;
2850     }
2851 
2852     list = collect_image_info_list(image_opts, filename, fmt, chain,
2853                                    force_share);
2854     if (!list) {
2855         return 1;
2856     }
2857 
2858     switch (output_format) {
2859     case OFORMAT_HUMAN:
2860         dump_human_image_info_list(list);
2861         break;
2862     case OFORMAT_JSON:
2863         if (chain) {
2864             dump_json_image_info_list(list);
2865         } else {
2866             dump_json_image_info(list->value);
2867         }
2868         break;
2869     }
2870 
2871     qapi_free_ImageInfoList(list);
2872     return 0;
2873 }
2874 
2875 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
2876                           MapEntry *next)
2877 {
2878     switch (output_format) {
2879     case OFORMAT_HUMAN:
2880         if (e->data && !e->has_offset) {
2881             error_report("File contains external, encrypted or compressed clusters.");
2882             return -1;
2883         }
2884         if (e->data && !e->zero) {
2885             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2886                    e->start, e->length,
2887                    e->has_offset ? e->offset : 0,
2888                    e->has_filename ? e->filename : "");
2889         }
2890         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2891          * Modify the flags here to allow more coalescing.
2892          */
2893         if (next && (!next->data || next->zero)) {
2894             next->data = false;
2895             next->zero = true;
2896         }
2897         break;
2898     case OFORMAT_JSON:
2899         printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2900                " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2901                (e->start == 0 ? "[" : ",\n"),
2902                e->start, e->length, e->depth,
2903                e->zero ? "true" : "false",
2904                e->data ? "true" : "false");
2905         if (e->has_offset) {
2906             printf(", \"offset\": %"PRId64"", e->offset);
2907         }
2908         putchar('}');
2909 
2910         if (!next) {
2911             printf("]\n");
2912         }
2913         break;
2914     }
2915     return 0;
2916 }
2917 
2918 static int get_block_status(BlockDriverState *bs, int64_t offset,
2919                             int64_t bytes, MapEntry *e)
2920 {
2921     int ret;
2922     int depth;
2923     BlockDriverState *file;
2924     bool has_offset;
2925     int64_t map;
2926     char *filename = NULL;
2927 
2928     /* As an optimization, we could cache the current range of unallocated
2929      * clusters in each file of the chain, and avoid querying the same
2930      * range repeatedly.
2931      */
2932 
2933     depth = 0;
2934     for (;;) {
2935         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
2936         if (ret < 0) {
2937             return ret;
2938         }
2939         assert(bytes);
2940         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2941             break;
2942         }
2943         bs = backing_bs(bs);
2944         if (bs == NULL) {
2945             ret = 0;
2946             break;
2947         }
2948 
2949         depth++;
2950     }
2951 
2952     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2953 
2954     if (file && has_offset) {
2955         bdrv_refresh_filename(file);
2956         filename = file->filename;
2957     }
2958 
2959     *e = (MapEntry) {
2960         .start = offset,
2961         .length = bytes,
2962         .data = !!(ret & BDRV_BLOCK_DATA),
2963         .zero = !!(ret & BDRV_BLOCK_ZERO),
2964         .offset = map,
2965         .has_offset = has_offset,
2966         .depth = depth,
2967         .has_filename = filename,
2968         .filename = filename,
2969     };
2970 
2971     return 0;
2972 }
2973 
2974 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2975 {
2976     if (curr->length == 0) {
2977         return false;
2978     }
2979     if (curr->zero != next->zero ||
2980         curr->data != next->data ||
2981         curr->depth != next->depth ||
2982         curr->has_filename != next->has_filename ||
2983         curr->has_offset != next->has_offset) {
2984         return false;
2985     }
2986     if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2987         return false;
2988     }
2989     if (curr->has_offset && curr->offset + curr->length != next->offset) {
2990         return false;
2991     }
2992     return true;
2993 }
2994 
2995 static int img_map(int argc, char **argv)
2996 {
2997     int c;
2998     OutputFormat output_format = OFORMAT_HUMAN;
2999     BlockBackend *blk;
3000     BlockDriverState *bs;
3001     const char *filename, *fmt, *output;
3002     int64_t length;
3003     MapEntry curr = { .length = 0 }, next;
3004     int ret = 0;
3005     bool image_opts = false;
3006     bool force_share = false;
3007 
3008     fmt = NULL;
3009     output = NULL;
3010     for (;;) {
3011         int option_index = 0;
3012         static const struct option long_options[] = {
3013             {"help", no_argument, 0, 'h'},
3014             {"format", required_argument, 0, 'f'},
3015             {"output", required_argument, 0, OPTION_OUTPUT},
3016             {"object", required_argument, 0, OPTION_OBJECT},
3017             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3018             {"force-share", no_argument, 0, 'U'},
3019             {0, 0, 0, 0}
3020         };
3021         c = getopt_long(argc, argv, ":f:hU",
3022                         long_options, &option_index);
3023         if (c == -1) {
3024             break;
3025         }
3026         switch (c) {
3027         case ':':
3028             missing_argument(argv[optind - 1]);
3029             break;
3030         case '?':
3031             unrecognized_option(argv[optind - 1]);
3032             break;
3033         case 'h':
3034             help();
3035             break;
3036         case 'f':
3037             fmt = optarg;
3038             break;
3039         case 'U':
3040             force_share = true;
3041             break;
3042         case OPTION_OUTPUT:
3043             output = optarg;
3044             break;
3045         case OPTION_OBJECT: {
3046             QemuOpts *opts;
3047             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3048                                            optarg, true);
3049             if (!opts) {
3050                 return 1;
3051             }
3052         }   break;
3053         case OPTION_IMAGE_OPTS:
3054             image_opts = true;
3055             break;
3056         }
3057     }
3058     if (optind != argc - 1) {
3059         error_exit("Expecting one image file name");
3060     }
3061     filename = argv[optind];
3062 
3063     if (output && !strcmp(output, "json")) {
3064         output_format = OFORMAT_JSON;
3065     } else if (output && !strcmp(output, "human")) {
3066         output_format = OFORMAT_HUMAN;
3067     } else if (output) {
3068         error_report("--output must be used with human or json as argument.");
3069         return 1;
3070     }
3071 
3072     if (qemu_opts_foreach(&qemu_object_opts,
3073                           user_creatable_add_opts_foreach,
3074                           qemu_img_object_print_help, &error_fatal)) {
3075         return 1;
3076     }
3077 
3078     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3079     if (!blk) {
3080         return 1;
3081     }
3082     bs = blk_bs(blk);
3083 
3084     if (output_format == OFORMAT_HUMAN) {
3085         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3086     }
3087 
3088     length = blk_getlength(blk);
3089     while (curr.start + curr.length < length) {
3090         int64_t offset = curr.start + curr.length;
3091         int64_t n;
3092 
3093         /* Probe up to 1 GiB at a time.  */
3094         n = MIN(1 * GiB, length - offset);
3095         ret = get_block_status(bs, offset, n, &next);
3096 
3097         if (ret < 0) {
3098             error_report("Could not read file metadata: %s", strerror(-ret));
3099             goto out;
3100         }
3101 
3102         if (entry_mergeable(&curr, &next)) {
3103             curr.length += next.length;
3104             continue;
3105         }
3106 
3107         if (curr.length > 0) {
3108             ret = dump_map_entry(output_format, &curr, &next);
3109             if (ret < 0) {
3110                 goto out;
3111             }
3112         }
3113         curr = next;
3114     }
3115 
3116     ret = dump_map_entry(output_format, &curr, NULL);
3117 
3118 out:
3119     blk_unref(blk);
3120     return ret < 0;
3121 }
3122 
3123 #define SNAPSHOT_LIST   1
3124 #define SNAPSHOT_CREATE 2
3125 #define SNAPSHOT_APPLY  3
3126 #define SNAPSHOT_DELETE 4
3127 
3128 static int img_snapshot(int argc, char **argv)
3129 {
3130     BlockBackend *blk;
3131     BlockDriverState *bs;
3132     QEMUSnapshotInfo sn;
3133     char *filename, *snapshot_name = NULL;
3134     int c, ret = 0, bdrv_oflags;
3135     int action = 0;
3136     qemu_timeval tv;
3137     bool quiet = false;
3138     Error *err = NULL;
3139     bool image_opts = false;
3140     bool force_share = false;
3141 
3142     bdrv_oflags = BDRV_O_RDWR;
3143     /* Parse commandline parameters */
3144     for(;;) {
3145         static const struct option long_options[] = {
3146             {"help", no_argument, 0, 'h'},
3147             {"object", required_argument, 0, OPTION_OBJECT},
3148             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3149             {"force-share", no_argument, 0, 'U'},
3150             {0, 0, 0, 0}
3151         };
3152         c = getopt_long(argc, argv, ":la:c:d:hqU",
3153                         long_options, NULL);
3154         if (c == -1) {
3155             break;
3156         }
3157         switch(c) {
3158         case ':':
3159             missing_argument(argv[optind - 1]);
3160             break;
3161         case '?':
3162             unrecognized_option(argv[optind - 1]);
3163             break;
3164         case 'h':
3165             help();
3166             return 0;
3167         case 'l':
3168             if (action) {
3169                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3170                 return 0;
3171             }
3172             action = SNAPSHOT_LIST;
3173             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3174             break;
3175         case 'a':
3176             if (action) {
3177                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3178                 return 0;
3179             }
3180             action = SNAPSHOT_APPLY;
3181             snapshot_name = optarg;
3182             break;
3183         case 'c':
3184             if (action) {
3185                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3186                 return 0;
3187             }
3188             action = SNAPSHOT_CREATE;
3189             snapshot_name = optarg;
3190             break;
3191         case 'd':
3192             if (action) {
3193                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3194                 return 0;
3195             }
3196             action = SNAPSHOT_DELETE;
3197             snapshot_name = optarg;
3198             break;
3199         case 'q':
3200             quiet = true;
3201             break;
3202         case 'U':
3203             force_share = true;
3204             break;
3205         case OPTION_OBJECT: {
3206             QemuOpts *opts;
3207             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3208                                            optarg, true);
3209             if (!opts) {
3210                 return 1;
3211             }
3212         }   break;
3213         case OPTION_IMAGE_OPTS:
3214             image_opts = true;
3215             break;
3216         }
3217     }
3218 
3219     if (optind != argc - 1) {
3220         error_exit("Expecting one image file name");
3221     }
3222     filename = argv[optind++];
3223 
3224     if (qemu_opts_foreach(&qemu_object_opts,
3225                           user_creatable_add_opts_foreach,
3226                           qemu_img_object_print_help, &error_fatal)) {
3227         return 1;
3228     }
3229 
3230     /* Open the image */
3231     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3232                    force_share);
3233     if (!blk) {
3234         return 1;
3235     }
3236     bs = blk_bs(blk);
3237 
3238     /* Perform the requested action */
3239     switch(action) {
3240     case SNAPSHOT_LIST:
3241         dump_snapshots(bs);
3242         break;
3243 
3244     case SNAPSHOT_CREATE:
3245         memset(&sn, 0, sizeof(sn));
3246         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3247 
3248         qemu_gettimeofday(&tv);
3249         sn.date_sec = tv.tv_sec;
3250         sn.date_nsec = tv.tv_usec * 1000;
3251 
3252         ret = bdrv_snapshot_create(bs, &sn);
3253         if (ret) {
3254             error_report("Could not create snapshot '%s': %d (%s)",
3255                 snapshot_name, ret, strerror(-ret));
3256         }
3257         break;
3258 
3259     case SNAPSHOT_APPLY:
3260         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3261         if (ret) {
3262             error_reportf_err(err, "Could not apply snapshot '%s': ",
3263                               snapshot_name);
3264         }
3265         break;
3266 
3267     case SNAPSHOT_DELETE:
3268         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3269         if (ret < 0) {
3270             error_report("Could not delete snapshot '%s': snapshot not "
3271                          "found", snapshot_name);
3272             ret = 1;
3273         } else {
3274             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3275             if (ret < 0) {
3276                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3277                                   snapshot_name);
3278                 ret = 1;
3279             }
3280         }
3281         break;
3282     }
3283 
3284     /* Cleanup */
3285     blk_unref(blk);
3286     if (ret) {
3287         return 1;
3288     }
3289     return 0;
3290 }
3291 
3292 static int img_rebase(int argc, char **argv)
3293 {
3294     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3295     uint8_t *buf_old = NULL;
3296     uint8_t *buf_new = NULL;
3297     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3298     char *filename;
3299     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3300     int c, flags, src_flags, ret;
3301     bool writethrough, src_writethrough;
3302     int unsafe = 0;
3303     bool force_share = false;
3304     int progress = 0;
3305     bool quiet = false;
3306     Error *local_err = NULL;
3307     bool image_opts = false;
3308 
3309     /* Parse commandline parameters */
3310     fmt = NULL;
3311     cache = BDRV_DEFAULT_CACHE;
3312     src_cache = BDRV_DEFAULT_CACHE;
3313     out_baseimg = NULL;
3314     out_basefmt = NULL;
3315     for(;;) {
3316         static const struct option long_options[] = {
3317             {"help", no_argument, 0, 'h'},
3318             {"object", required_argument, 0, OPTION_OBJECT},
3319             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3320             {"force-share", no_argument, 0, 'U'},
3321             {0, 0, 0, 0}
3322         };
3323         c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
3324                         long_options, NULL);
3325         if (c == -1) {
3326             break;
3327         }
3328         switch(c) {
3329         case ':':
3330             missing_argument(argv[optind - 1]);
3331             break;
3332         case '?':
3333             unrecognized_option(argv[optind - 1]);
3334             break;
3335         case 'h':
3336             help();
3337             return 0;
3338         case 'f':
3339             fmt = optarg;
3340             break;
3341         case 'F':
3342             out_basefmt = optarg;
3343             break;
3344         case 'b':
3345             out_baseimg = optarg;
3346             break;
3347         case 'u':
3348             unsafe = 1;
3349             break;
3350         case 'p':
3351             progress = 1;
3352             break;
3353         case 't':
3354             cache = optarg;
3355             break;
3356         case 'T':
3357             src_cache = optarg;
3358             break;
3359         case 'q':
3360             quiet = true;
3361             break;
3362         case OPTION_OBJECT: {
3363             QemuOpts *opts;
3364             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3365                                            optarg, true);
3366             if (!opts) {
3367                 return 1;
3368             }
3369         }   break;
3370         case OPTION_IMAGE_OPTS:
3371             image_opts = true;
3372             break;
3373         case 'U':
3374             force_share = true;
3375             break;
3376         }
3377     }
3378 
3379     if (quiet) {
3380         progress = 0;
3381     }
3382 
3383     if (optind != argc - 1) {
3384         error_exit("Expecting one image file name");
3385     }
3386     if (!unsafe && !out_baseimg) {
3387         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3388     }
3389     filename = argv[optind++];
3390 
3391     if (qemu_opts_foreach(&qemu_object_opts,
3392                           user_creatable_add_opts_foreach,
3393                           qemu_img_object_print_help, &error_fatal)) {
3394         return 1;
3395     }
3396 
3397     qemu_progress_init(progress, 2.0);
3398     qemu_progress_print(0, 100);
3399 
3400     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3401     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3402     if (ret < 0) {
3403         error_report("Invalid cache option: %s", cache);
3404         goto out;
3405     }
3406 
3407     src_flags = 0;
3408     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3409     if (ret < 0) {
3410         error_report("Invalid source cache option: %s", src_cache);
3411         goto out;
3412     }
3413 
3414     /* The source files are opened read-only, don't care about WCE */
3415     assert((src_flags & BDRV_O_RDWR) == 0);
3416     (void) src_writethrough;
3417 
3418     /*
3419      * Open the images.
3420      *
3421      * Ignore the old backing file for unsafe rebase in case we want to correct
3422      * the reference to a renamed or moved backing file.
3423      */
3424     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3425                    false);
3426     if (!blk) {
3427         ret = -1;
3428         goto out;
3429     }
3430     bs = blk_bs(blk);
3431 
3432     if (out_basefmt != NULL) {
3433         if (bdrv_find_format(out_basefmt) == NULL) {
3434             error_report("Invalid format name: '%s'", out_basefmt);
3435             ret = -1;
3436             goto out;
3437         }
3438     }
3439 
3440     /* For safe rebasing we need to compare old and new backing file */
3441     if (!unsafe) {
3442         QDict *options = NULL;
3443         BlockDriverState *base_bs = backing_bs(bs);
3444 
3445         if (base_bs) {
3446             blk_old_backing = blk_new(qemu_get_aio_context(),
3447                                       BLK_PERM_CONSISTENT_READ,
3448                                       BLK_PERM_ALL);
3449             ret = blk_insert_bs(blk_old_backing, base_bs,
3450                                 &local_err);
3451             if (ret < 0) {
3452                 error_reportf_err(local_err,
3453                                   "Could not reuse old backing file '%s': ",
3454                                   base_bs->filename);
3455                 goto out;
3456             }
3457         } else {
3458             blk_old_backing = NULL;
3459         }
3460 
3461         if (out_baseimg[0]) {
3462             const char *overlay_filename;
3463             char *out_real_path;
3464 
3465             options = qdict_new();
3466             if (out_basefmt) {
3467                 qdict_put_str(options, "driver", out_basefmt);
3468             }
3469             if (force_share) {
3470                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3471             }
3472 
3473             bdrv_refresh_filename(bs);
3474             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3475                                                      : bs->filename;
3476             out_real_path =
3477                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3478                                                              out_baseimg,
3479                                                              &local_err);
3480             if (local_err) {
3481                 qobject_unref(options);
3482                 error_reportf_err(local_err,
3483                                   "Could not resolve backing filename: ");
3484                 ret = -1;
3485                 goto out;
3486             }
3487 
3488             /*
3489              * Find out whether we rebase an image on top of a previous image
3490              * in its chain.
3491              */
3492             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3493             if (prefix_chain_bs) {
3494                 qobject_unref(options);
3495                 g_free(out_real_path);
3496 
3497                 blk_new_backing = blk_new(qemu_get_aio_context(),
3498                                           BLK_PERM_CONSISTENT_READ,
3499                                           BLK_PERM_ALL);
3500                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3501                                     &local_err);
3502                 if (ret < 0) {
3503                     error_reportf_err(local_err,
3504                                       "Could not reuse backing file '%s': ",
3505                                       out_baseimg);
3506                     goto out;
3507                 }
3508             } else {
3509                 blk_new_backing = blk_new_open(out_real_path, NULL,
3510                                                options, src_flags, &local_err);
3511                 g_free(out_real_path);
3512                 if (!blk_new_backing) {
3513                     error_reportf_err(local_err,
3514                                       "Could not open new backing file '%s': ",
3515                                       out_baseimg);
3516                     ret = -1;
3517                     goto out;
3518                 }
3519             }
3520         }
3521     }
3522 
3523     /*
3524      * Check each unallocated cluster in the COW file. If it is unallocated,
3525      * accesses go to the backing file. We must therefore compare this cluster
3526      * in the old and new backing file, and if they differ we need to copy it
3527      * from the old backing file into the COW file.
3528      *
3529      * If qemu-img crashes during this step, no harm is done. The content of
3530      * the image is the same as the original one at any time.
3531      */
3532     if (!unsafe) {
3533         int64_t size;
3534         int64_t old_backing_size = 0;
3535         int64_t new_backing_size = 0;
3536         uint64_t offset;
3537         int64_t n;
3538         float local_progress = 0;
3539 
3540         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3541         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3542 
3543         size = blk_getlength(blk);
3544         if (size < 0) {
3545             error_report("Could not get size of '%s': %s",
3546                          filename, strerror(-size));
3547             ret = -1;
3548             goto out;
3549         }
3550         if (blk_old_backing) {
3551             old_backing_size = blk_getlength(blk_old_backing);
3552             if (old_backing_size < 0) {
3553                 char backing_name[PATH_MAX];
3554 
3555                 bdrv_get_backing_filename(bs, backing_name,
3556                                           sizeof(backing_name));
3557                 error_report("Could not get size of '%s': %s",
3558                              backing_name, strerror(-old_backing_size));
3559                 ret = -1;
3560                 goto out;
3561             }
3562         }
3563         if (blk_new_backing) {
3564             new_backing_size = blk_getlength(blk_new_backing);
3565             if (new_backing_size < 0) {
3566                 error_report("Could not get size of '%s': %s",
3567                              out_baseimg, strerror(-new_backing_size));
3568                 ret = -1;
3569                 goto out;
3570             }
3571         }
3572 
3573         if (size != 0) {
3574             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3575         }
3576 
3577         for (offset = 0; offset < size; offset += n) {
3578             bool buf_old_is_zero = false;
3579 
3580             /* How many bytes can we handle with the next read? */
3581             n = MIN(IO_BUF_SIZE, size - offset);
3582 
3583             /* If the cluster is allocated, we don't need to take action */
3584             ret = bdrv_is_allocated(bs, offset, n, &n);
3585             if (ret < 0) {
3586                 error_report("error while reading image metadata: %s",
3587                              strerror(-ret));
3588                 goto out;
3589             }
3590             if (ret) {
3591                 continue;
3592             }
3593 
3594             if (prefix_chain_bs) {
3595                 /*
3596                  * If cluster wasn't changed since prefix_chain, we don't need
3597                  * to take action
3598                  */
3599                 ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
3600                                               false, offset, n, &n);
3601                 if (ret < 0) {
3602                     error_report("error while reading image metadata: %s",
3603                                  strerror(-ret));
3604                     goto out;
3605                 }
3606                 if (!ret) {
3607                     continue;
3608                 }
3609             }
3610 
3611             /*
3612              * Read old and new backing file and take into consideration that
3613              * backing files may be smaller than the COW image.
3614              */
3615             if (offset >= old_backing_size) {
3616                 memset(buf_old, 0, n);
3617                 buf_old_is_zero = true;
3618             } else {
3619                 if (offset + n > old_backing_size) {
3620                     n = old_backing_size - offset;
3621                 }
3622 
3623                 ret = blk_pread(blk_old_backing, offset, buf_old, n);
3624                 if (ret < 0) {
3625                     error_report("error while reading from old backing file");
3626                     goto out;
3627                 }
3628             }
3629 
3630             if (offset >= new_backing_size || !blk_new_backing) {
3631                 memset(buf_new, 0, n);
3632             } else {
3633                 if (offset + n > new_backing_size) {
3634                     n = new_backing_size - offset;
3635                 }
3636 
3637                 ret = blk_pread(blk_new_backing, offset, buf_new, n);
3638                 if (ret < 0) {
3639                     error_report("error while reading from new backing file");
3640                     goto out;
3641                 }
3642             }
3643 
3644             /* If they differ, we need to write to the COW file */
3645             uint64_t written = 0;
3646 
3647             while (written < n) {
3648                 int64_t pnum;
3649 
3650                 if (compare_buffers(buf_old + written, buf_new + written,
3651                                     n - written, &pnum))
3652                 {
3653                     if (buf_old_is_zero) {
3654                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
3655                     } else {
3656                         ret = blk_pwrite(blk, offset + written,
3657                                          buf_old + written, pnum, 0);
3658                     }
3659                     if (ret < 0) {
3660                         error_report("Error while writing to COW image: %s",
3661                             strerror(-ret));
3662                         goto out;
3663                     }
3664                 }
3665 
3666                 written += pnum;
3667             }
3668             qemu_progress_print(local_progress, 100);
3669         }
3670     }
3671 
3672     /*
3673      * Change the backing file. All clusters that are different from the old
3674      * backing file are overwritten in the COW file now, so the visible content
3675      * doesn't change when we switch the backing file.
3676      */
3677     if (out_baseimg && *out_baseimg) {
3678         ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3679     } else {
3680         ret = bdrv_change_backing_file(bs, NULL, NULL);
3681     }
3682 
3683     if (ret == -ENOSPC) {
3684         error_report("Could not change the backing file to '%s': No "
3685                      "space left in the file header", out_baseimg);
3686     } else if (ret < 0) {
3687         error_report("Could not change the backing file to '%s': %s",
3688             out_baseimg, strerror(-ret));
3689     }
3690 
3691     qemu_progress_print(100, 0);
3692     /*
3693      * TODO At this point it is possible to check if any clusters that are
3694      * allocated in the COW file are the same in the backing file. If so, they
3695      * could be dropped from the COW file. Don't do this before switching the
3696      * backing file, in case of a crash this would lead to corruption.
3697      */
3698 out:
3699     qemu_progress_end();
3700     /* Cleanup */
3701     if (!unsafe) {
3702         blk_unref(blk_old_backing);
3703         blk_unref(blk_new_backing);
3704     }
3705     qemu_vfree(buf_old);
3706     qemu_vfree(buf_new);
3707 
3708     blk_unref(blk);
3709     if (ret) {
3710         return 1;
3711     }
3712     return 0;
3713 }
3714 
3715 static int img_resize(int argc, char **argv)
3716 {
3717     Error *err = NULL;
3718     int c, ret, relative;
3719     const char *filename, *fmt, *size;
3720     int64_t n, total_size, current_size;
3721     bool quiet = false;
3722     BlockBackend *blk = NULL;
3723     PreallocMode prealloc = PREALLOC_MODE_OFF;
3724     QemuOpts *param;
3725 
3726     static QemuOptsList resize_options = {
3727         .name = "resize_options",
3728         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3729         .desc = {
3730             {
3731                 .name = BLOCK_OPT_SIZE,
3732                 .type = QEMU_OPT_SIZE,
3733                 .help = "Virtual disk size"
3734             }, {
3735                 /* end of list */
3736             }
3737         },
3738     };
3739     bool image_opts = false;
3740     bool shrink = false;
3741 
3742     /* Remove size from argv manually so that negative numbers are not treated
3743      * as options by getopt. */
3744     if (argc < 3) {
3745         error_exit("Not enough arguments");
3746         return 1;
3747     }
3748 
3749     size = argv[--argc];
3750 
3751     /* Parse getopt arguments */
3752     fmt = NULL;
3753     for(;;) {
3754         static const struct option long_options[] = {
3755             {"help", no_argument, 0, 'h'},
3756             {"object", required_argument, 0, OPTION_OBJECT},
3757             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3758             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
3759             {"shrink", no_argument, 0, OPTION_SHRINK},
3760             {0, 0, 0, 0}
3761         };
3762         c = getopt_long(argc, argv, ":f:hq",
3763                         long_options, NULL);
3764         if (c == -1) {
3765             break;
3766         }
3767         switch(c) {
3768         case ':':
3769             missing_argument(argv[optind - 1]);
3770             break;
3771         case '?':
3772             unrecognized_option(argv[optind - 1]);
3773             break;
3774         case 'h':
3775             help();
3776             break;
3777         case 'f':
3778             fmt = optarg;
3779             break;
3780         case 'q':
3781             quiet = true;
3782             break;
3783         case OPTION_OBJECT: {
3784             QemuOpts *opts;
3785             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3786                                            optarg, true);
3787             if (!opts) {
3788                 return 1;
3789             }
3790         }   break;
3791         case OPTION_IMAGE_OPTS:
3792             image_opts = true;
3793             break;
3794         case OPTION_PREALLOCATION:
3795             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
3796                                        PREALLOC_MODE__MAX, NULL);
3797             if (prealloc == PREALLOC_MODE__MAX) {
3798                 error_report("Invalid preallocation mode '%s'", optarg);
3799                 return 1;
3800             }
3801             break;
3802         case OPTION_SHRINK:
3803             shrink = true;
3804             break;
3805         }
3806     }
3807     if (optind != argc - 1) {
3808         error_exit("Expecting image file name and size");
3809     }
3810     filename = argv[optind++];
3811 
3812     if (qemu_opts_foreach(&qemu_object_opts,
3813                           user_creatable_add_opts_foreach,
3814                           qemu_img_object_print_help, &error_fatal)) {
3815         return 1;
3816     }
3817 
3818     /* Choose grow, shrink, or absolute resize mode */
3819     switch (size[0]) {
3820     case '+':
3821         relative = 1;
3822         size++;
3823         break;
3824     case '-':
3825         relative = -1;
3826         size++;
3827         break;
3828     default:
3829         relative = 0;
3830         break;
3831     }
3832 
3833     /* Parse size */
3834     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3835     qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3836     if (err) {
3837         error_report_err(err);
3838         ret = -1;
3839         qemu_opts_del(param);
3840         goto out;
3841     }
3842     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3843     qemu_opts_del(param);
3844 
3845     blk = img_open(image_opts, filename, fmt,
3846                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
3847                    false);
3848     if (!blk) {
3849         ret = -1;
3850         goto out;
3851     }
3852 
3853     current_size = blk_getlength(blk);
3854     if (current_size < 0) {
3855         error_report("Failed to inquire current image length: %s",
3856                      strerror(-current_size));
3857         ret = -1;
3858         goto out;
3859     }
3860 
3861     if (relative) {
3862         total_size = current_size + n * relative;
3863     } else {
3864         total_size = n;
3865     }
3866     if (total_size <= 0) {
3867         error_report("New image size must be positive");
3868         ret = -1;
3869         goto out;
3870     }
3871 
3872     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
3873         error_report("Preallocation can only be used for growing images");
3874         ret = -1;
3875         goto out;
3876     }
3877 
3878     if (total_size < current_size && !shrink) {
3879         warn_report("Shrinking an image will delete all data beyond the "
3880                     "shrunken image's end. Before performing such an "
3881                     "operation, make sure there is no important data there.");
3882 
3883         if (g_strcmp0(bdrv_get_format_name(blk_bs(blk)), "raw") != 0) {
3884             error_report(
3885               "Use the --shrink option to perform a shrink operation.");
3886             ret = -1;
3887             goto out;
3888         } else {
3889             warn_report("Using the --shrink option will suppress this message. "
3890                         "Note that future versions of qemu-img may refuse to "
3891                         "shrink images without this option.");
3892         }
3893     }
3894 
3895     /*
3896      * The user expects the image to have the desired size after
3897      * resizing, so pass @exact=true.  It is of no use to report
3898      * success when the image has not actually been resized.
3899      */
3900     ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
3901     if (!ret) {
3902         qprintf(quiet, "Image resized.\n");
3903     } else {
3904         error_report_err(err);
3905     }
3906 out:
3907     blk_unref(blk);
3908     if (ret) {
3909         return 1;
3910     }
3911     return 0;
3912 }
3913 
3914 static void amend_status_cb(BlockDriverState *bs,
3915                             int64_t offset, int64_t total_work_size,
3916                             void *opaque)
3917 {
3918     qemu_progress_print(100.f * offset / total_work_size, 0);
3919 }
3920 
3921 static int print_amend_option_help(const char *format)
3922 {
3923     BlockDriver *drv;
3924 
3925     /* Find driver and parse its options */
3926     drv = bdrv_find_format(format);
3927     if (!drv) {
3928         error_report("Unknown file format '%s'", format);
3929         return 1;
3930     }
3931 
3932     if (!drv->bdrv_amend_options) {
3933         error_report("Format driver '%s' does not support option amendment",
3934                      format);
3935         return 1;
3936     }
3937 
3938     /* Every driver supporting amendment must have create_opts */
3939     assert(drv->create_opts);
3940 
3941     printf("Creation options for '%s':\n", format);
3942     qemu_opts_print_help(drv->create_opts, false);
3943     printf("\nNote that not all of these options may be amendable.\n");
3944     return 0;
3945 }
3946 
3947 static int img_amend(int argc, char **argv)
3948 {
3949     Error *err = NULL;
3950     int c, ret = 0;
3951     char *options = NULL;
3952     QemuOptsList *create_opts = NULL;
3953     QemuOpts *opts = NULL;
3954     const char *fmt = NULL, *filename, *cache;
3955     int flags;
3956     bool writethrough;
3957     bool quiet = false, progress = false;
3958     BlockBackend *blk = NULL;
3959     BlockDriverState *bs = NULL;
3960     bool image_opts = false;
3961 
3962     cache = BDRV_DEFAULT_CACHE;
3963     for (;;) {
3964         static const struct option long_options[] = {
3965             {"help", no_argument, 0, 'h'},
3966             {"object", required_argument, 0, OPTION_OBJECT},
3967             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3968             {0, 0, 0, 0}
3969         };
3970         c = getopt_long(argc, argv, ":ho:f:t:pq",
3971                         long_options, NULL);
3972         if (c == -1) {
3973             break;
3974         }
3975 
3976         switch (c) {
3977         case ':':
3978             missing_argument(argv[optind - 1]);
3979             break;
3980         case '?':
3981             unrecognized_option(argv[optind - 1]);
3982             break;
3983         case 'h':
3984             help();
3985             break;
3986         case 'o':
3987             if (accumulate_options(&options, optarg) < 0) {
3988                 ret = -1;
3989                 goto out_no_progress;
3990             }
3991             break;
3992         case 'f':
3993             fmt = optarg;
3994             break;
3995         case 't':
3996             cache = optarg;
3997             break;
3998         case 'p':
3999             progress = true;
4000             break;
4001         case 'q':
4002             quiet = true;
4003             break;
4004         case OPTION_OBJECT:
4005             opts = qemu_opts_parse_noisily(&qemu_object_opts,
4006                                            optarg, true);
4007             if (!opts) {
4008                 ret = -1;
4009                 goto out_no_progress;
4010             }
4011             break;
4012         case OPTION_IMAGE_OPTS:
4013             image_opts = true;
4014             break;
4015         }
4016     }
4017 
4018     if (!options) {
4019         error_exit("Must specify options (-o)");
4020     }
4021 
4022     if (qemu_opts_foreach(&qemu_object_opts,
4023                           user_creatable_add_opts_foreach,
4024                           qemu_img_object_print_help, &error_fatal)) {
4025         ret = -1;
4026         goto out_no_progress;
4027     }
4028 
4029     if (quiet) {
4030         progress = false;
4031     }
4032     qemu_progress_init(progress, 1.0);
4033 
4034     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4035     if (fmt && has_help_option(options)) {
4036         /* If a format is explicitly specified (and possibly no filename is
4037          * given), print option help here */
4038         ret = print_amend_option_help(fmt);
4039         goto out;
4040     }
4041 
4042     if (optind != argc - 1) {
4043         error_report("Expecting one image file name");
4044         ret = -1;
4045         goto out;
4046     }
4047 
4048     flags = BDRV_O_RDWR;
4049     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4050     if (ret < 0) {
4051         error_report("Invalid cache option: %s", cache);
4052         goto out;
4053     }
4054 
4055     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4056                    false);
4057     if (!blk) {
4058         ret = -1;
4059         goto out;
4060     }
4061     bs = blk_bs(blk);
4062 
4063     fmt = bs->drv->format_name;
4064 
4065     if (has_help_option(options)) {
4066         /* If the format was auto-detected, print option help here */
4067         ret = print_amend_option_help(fmt);
4068         goto out;
4069     }
4070 
4071     if (!bs->drv->bdrv_amend_options) {
4072         error_report("Format driver '%s' does not support option amendment",
4073                      fmt);
4074         ret = -1;
4075         goto out;
4076     }
4077 
4078     /* Every driver supporting amendment must have create_opts */
4079     assert(bs->drv->create_opts);
4080 
4081     create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
4082     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4083     qemu_opts_do_parse(opts, options, NULL, &err);
4084     if (err) {
4085         error_report_err(err);
4086         ret = -1;
4087         goto out;
4088     }
4089 
4090     /* In case the driver does not call amend_status_cb() */
4091     qemu_progress_print(0.f, 0);
4092     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, &err);
4093     qemu_progress_print(100.f, 0);
4094     if (ret < 0) {
4095         error_report_err(err);
4096         goto out;
4097     }
4098 
4099 out:
4100     qemu_progress_end();
4101 
4102 out_no_progress:
4103     blk_unref(blk);
4104     qemu_opts_del(opts);
4105     qemu_opts_free(create_opts);
4106     g_free(options);
4107 
4108     if (ret) {
4109         return 1;
4110     }
4111     return 0;
4112 }
4113 
4114 typedef struct BenchData {
4115     BlockBackend *blk;
4116     uint64_t image_size;
4117     bool write;
4118     int bufsize;
4119     int step;
4120     int nrreq;
4121     int n;
4122     int flush_interval;
4123     bool drain_on_flush;
4124     uint8_t *buf;
4125     QEMUIOVector *qiov;
4126 
4127     int in_flight;
4128     bool in_flush;
4129     uint64_t offset;
4130 } BenchData;
4131 
4132 static void bench_undrained_flush_cb(void *opaque, int ret)
4133 {
4134     if (ret < 0) {
4135         error_report("Failed flush request: %s", strerror(-ret));
4136         exit(EXIT_FAILURE);
4137     }
4138 }
4139 
4140 static void bench_cb(void *opaque, int ret)
4141 {
4142     BenchData *b = opaque;
4143     BlockAIOCB *acb;
4144 
4145     if (ret < 0) {
4146         error_report("Failed request: %s", strerror(-ret));
4147         exit(EXIT_FAILURE);
4148     }
4149 
4150     if (b->in_flush) {
4151         /* Just finished a flush with drained queue: Start next requests */
4152         assert(b->in_flight == 0);
4153         b->in_flush = false;
4154     } else if (b->in_flight > 0) {
4155         int remaining = b->n - b->in_flight;
4156 
4157         b->n--;
4158         b->in_flight--;
4159 
4160         /* Time for flush? Drain queue if requested, then flush */
4161         if (b->flush_interval && remaining % b->flush_interval == 0) {
4162             if (!b->in_flight || !b->drain_on_flush) {
4163                 BlockCompletionFunc *cb;
4164 
4165                 if (b->drain_on_flush) {
4166                     b->in_flush = true;
4167                     cb = bench_cb;
4168                 } else {
4169                     cb = bench_undrained_flush_cb;
4170                 }
4171 
4172                 acb = blk_aio_flush(b->blk, cb, b);
4173                 if (!acb) {
4174                     error_report("Failed to issue flush request");
4175                     exit(EXIT_FAILURE);
4176                 }
4177             }
4178             if (b->drain_on_flush) {
4179                 return;
4180             }
4181         }
4182     }
4183 
4184     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4185         int64_t offset = b->offset;
4186         /* blk_aio_* might look for completed I/Os and kick bench_cb
4187          * again, so make sure this operation is counted by in_flight
4188          * and b->offset is ready for the next submission.
4189          */
4190         b->in_flight++;
4191         b->offset += b->step;
4192         b->offset %= b->image_size;
4193         if (b->write) {
4194             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4195         } else {
4196             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4197         }
4198         if (!acb) {
4199             error_report("Failed to issue request");
4200             exit(EXIT_FAILURE);
4201         }
4202     }
4203 }
4204 
4205 static int img_bench(int argc, char **argv)
4206 {
4207     int c, ret = 0;
4208     const char *fmt = NULL, *filename;
4209     bool quiet = false;
4210     bool image_opts = false;
4211     bool is_write = false;
4212     int count = 75000;
4213     int depth = 64;
4214     int64_t offset = 0;
4215     size_t bufsize = 4096;
4216     int pattern = 0;
4217     size_t step = 0;
4218     int flush_interval = 0;
4219     bool drain_on_flush = true;
4220     int64_t image_size;
4221     BlockBackend *blk = NULL;
4222     BenchData data = {};
4223     int flags = 0;
4224     bool writethrough = false;
4225     struct timeval t1, t2;
4226     int i;
4227     bool force_share = false;
4228     size_t buf_size;
4229 
4230     for (;;) {
4231         static const struct option long_options[] = {
4232             {"help", no_argument, 0, 'h'},
4233             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4234             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4235             {"pattern", required_argument, 0, OPTION_PATTERN},
4236             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4237             {"force-share", no_argument, 0, 'U'},
4238             {0, 0, 0, 0}
4239         };
4240         c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
4241                         NULL);
4242         if (c == -1) {
4243             break;
4244         }
4245 
4246         switch (c) {
4247         case ':':
4248             missing_argument(argv[optind - 1]);
4249             break;
4250         case '?':
4251             unrecognized_option(argv[optind - 1]);
4252             break;
4253         case 'h':
4254             help();
4255             break;
4256         case 'c':
4257         {
4258             unsigned long res;
4259 
4260             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4261                 error_report("Invalid request count specified");
4262                 return 1;
4263             }
4264             count = res;
4265             break;
4266         }
4267         case 'd':
4268         {
4269             unsigned long res;
4270 
4271             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4272                 error_report("Invalid queue depth specified");
4273                 return 1;
4274             }
4275             depth = res;
4276             break;
4277         }
4278         case 'f':
4279             fmt = optarg;
4280             break;
4281         case 'n':
4282             flags |= BDRV_O_NATIVE_AIO;
4283             break;
4284         case 'i':
4285             ret = bdrv_parse_aio(optarg, &flags);
4286             if (ret < 0) {
4287                 error_report("Invalid aio option: %s", optarg);
4288                 ret = -1;
4289                 goto out;
4290             }
4291             break;
4292         case 'o':
4293         {
4294             offset = cvtnum(optarg);
4295             if (offset < 0) {
4296                 error_report("Invalid offset specified");
4297                 return 1;
4298             }
4299             break;
4300         }
4301             break;
4302         case 'q':
4303             quiet = true;
4304             break;
4305         case 's':
4306         {
4307             int64_t sval;
4308 
4309             sval = cvtnum(optarg);
4310             if (sval < 0 || sval > INT_MAX) {
4311                 error_report("Invalid buffer size specified");
4312                 return 1;
4313             }
4314 
4315             bufsize = sval;
4316             break;
4317         }
4318         case 'S':
4319         {
4320             int64_t sval;
4321 
4322             sval = cvtnum(optarg);
4323             if (sval < 0 || sval > INT_MAX) {
4324                 error_report("Invalid step size specified");
4325                 return 1;
4326             }
4327 
4328             step = sval;
4329             break;
4330         }
4331         case 't':
4332             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4333             if (ret < 0) {
4334                 error_report("Invalid cache mode");
4335                 ret = -1;
4336                 goto out;
4337             }
4338             break;
4339         case 'w':
4340             flags |= BDRV_O_RDWR;
4341             is_write = true;
4342             break;
4343         case 'U':
4344             force_share = true;
4345             break;
4346         case OPTION_PATTERN:
4347         {
4348             unsigned long res;
4349 
4350             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4351                 error_report("Invalid pattern byte specified");
4352                 return 1;
4353             }
4354             pattern = res;
4355             break;
4356         }
4357         case OPTION_FLUSH_INTERVAL:
4358         {
4359             unsigned long res;
4360 
4361             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4362                 error_report("Invalid flush interval specified");
4363                 return 1;
4364             }
4365             flush_interval = res;
4366             break;
4367         }
4368         case OPTION_NO_DRAIN:
4369             drain_on_flush = false;
4370             break;
4371         case OPTION_IMAGE_OPTS:
4372             image_opts = true;
4373             break;
4374         }
4375     }
4376 
4377     if (optind != argc - 1) {
4378         error_exit("Expecting one image file name");
4379     }
4380     filename = argv[argc - 1];
4381 
4382     if (!is_write && flush_interval) {
4383         error_report("--flush-interval is only available in write tests");
4384         ret = -1;
4385         goto out;
4386     }
4387     if (flush_interval && flush_interval < depth) {
4388         error_report("Flush interval can't be smaller than depth");
4389         ret = -1;
4390         goto out;
4391     }
4392 
4393     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4394                    force_share);
4395     if (!blk) {
4396         ret = -1;
4397         goto out;
4398     }
4399 
4400     image_size = blk_getlength(blk);
4401     if (image_size < 0) {
4402         ret = image_size;
4403         goto out;
4404     }
4405 
4406     data = (BenchData) {
4407         .blk            = blk,
4408         .image_size     = image_size,
4409         .bufsize        = bufsize,
4410         .step           = step ?: bufsize,
4411         .nrreq          = depth,
4412         .n              = count,
4413         .offset         = offset,
4414         .write          = is_write,
4415         .flush_interval = flush_interval,
4416         .drain_on_flush = drain_on_flush,
4417     };
4418     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4419            "(starting at offset %" PRId64 ", step size %d)\n",
4420            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4421            data.offset, data.step);
4422     if (flush_interval) {
4423         printf("Sending flush every %d requests\n", flush_interval);
4424     }
4425 
4426     buf_size = data.nrreq * data.bufsize;
4427     data.buf = blk_blockalign(blk, buf_size);
4428     memset(data.buf, pattern, data.nrreq * data.bufsize);
4429 
4430     blk_register_buf(blk, data.buf, buf_size);
4431 
4432     data.qiov = g_new(QEMUIOVector, data.nrreq);
4433     for (i = 0; i < data.nrreq; i++) {
4434         qemu_iovec_init(&data.qiov[i], 1);
4435         qemu_iovec_add(&data.qiov[i],
4436                        data.buf + i * data.bufsize, data.bufsize);
4437     }
4438 
4439     gettimeofday(&t1, NULL);
4440     bench_cb(&data, 0);
4441 
4442     while (data.n > 0) {
4443         main_loop_wait(false);
4444     }
4445     gettimeofday(&t2, NULL);
4446 
4447     printf("Run completed in %3.3f seconds.\n",
4448            (t2.tv_sec - t1.tv_sec)
4449            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4450 
4451 out:
4452     if (data.buf) {
4453         blk_unregister_buf(blk, data.buf);
4454     }
4455     qemu_vfree(data.buf);
4456     blk_unref(blk);
4457 
4458     if (ret) {
4459         return 1;
4460     }
4461     return 0;
4462 }
4463 
4464 #define C_BS      01
4465 #define C_COUNT   02
4466 #define C_IF      04
4467 #define C_OF      010
4468 #define C_SKIP    020
4469 
4470 struct DdInfo {
4471     unsigned int flags;
4472     int64_t count;
4473 };
4474 
4475 struct DdIo {
4476     int bsz;    /* Block size */
4477     char *filename;
4478     uint8_t *buf;
4479     int64_t offset;
4480 };
4481 
4482 struct DdOpts {
4483     const char *name;
4484     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
4485     unsigned int flag;
4486 };
4487 
4488 static int img_dd_bs(const char *arg,
4489                      struct DdIo *in, struct DdIo *out,
4490                      struct DdInfo *dd)
4491 {
4492     int64_t res;
4493 
4494     res = cvtnum(arg);
4495 
4496     if (res <= 0 || res > INT_MAX) {
4497         error_report("invalid number: '%s'", arg);
4498         return 1;
4499     }
4500     in->bsz = out->bsz = res;
4501 
4502     return 0;
4503 }
4504 
4505 static int img_dd_count(const char *arg,
4506                         struct DdIo *in, struct DdIo *out,
4507                         struct DdInfo *dd)
4508 {
4509     dd->count = cvtnum(arg);
4510 
4511     if (dd->count < 0) {
4512         error_report("invalid number: '%s'", arg);
4513         return 1;
4514     }
4515 
4516     return 0;
4517 }
4518 
4519 static int img_dd_if(const char *arg,
4520                      struct DdIo *in, struct DdIo *out,
4521                      struct DdInfo *dd)
4522 {
4523     in->filename = g_strdup(arg);
4524 
4525     return 0;
4526 }
4527 
4528 static int img_dd_of(const char *arg,
4529                      struct DdIo *in, struct DdIo *out,
4530                      struct DdInfo *dd)
4531 {
4532     out->filename = g_strdup(arg);
4533 
4534     return 0;
4535 }
4536 
4537 static int img_dd_skip(const char *arg,
4538                        struct DdIo *in, struct DdIo *out,
4539                        struct DdInfo *dd)
4540 {
4541     in->offset = cvtnum(arg);
4542 
4543     if (in->offset < 0) {
4544         error_report("invalid number: '%s'", arg);
4545         return 1;
4546     }
4547 
4548     return 0;
4549 }
4550 
4551 static int img_dd(int argc, char **argv)
4552 {
4553     int ret = 0;
4554     char *arg = NULL;
4555     char *tmp;
4556     BlockDriver *drv = NULL, *proto_drv = NULL;
4557     BlockBackend *blk1 = NULL, *blk2 = NULL;
4558     QemuOpts *opts = NULL;
4559     QemuOptsList *create_opts = NULL;
4560     Error *local_err = NULL;
4561     bool image_opts = false;
4562     int c, i;
4563     const char *out_fmt = "raw";
4564     const char *fmt = NULL;
4565     int64_t size = 0;
4566     int64_t block_count = 0, out_pos, in_pos;
4567     bool force_share = false;
4568     struct DdInfo dd = {
4569         .flags = 0,
4570         .count = 0,
4571     };
4572     struct DdIo in = {
4573         .bsz = 512, /* Block size is by default 512 bytes */
4574         .filename = NULL,
4575         .buf = NULL,
4576         .offset = 0
4577     };
4578     struct DdIo out = {
4579         .bsz = 512,
4580         .filename = NULL,
4581         .buf = NULL,
4582         .offset = 0
4583     };
4584 
4585     const struct DdOpts options[] = {
4586         { "bs", img_dd_bs, C_BS },
4587         { "count", img_dd_count, C_COUNT },
4588         { "if", img_dd_if, C_IF },
4589         { "of", img_dd_of, C_OF },
4590         { "skip", img_dd_skip, C_SKIP },
4591         { NULL, NULL, 0 }
4592     };
4593     const struct option long_options[] = {
4594         { "help", no_argument, 0, 'h'},
4595         { "object", required_argument, 0, OPTION_OBJECT},
4596         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4597         { "force-share", no_argument, 0, 'U'},
4598         { 0, 0, 0, 0 }
4599     };
4600 
4601     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
4602         if (c == EOF) {
4603             break;
4604         }
4605         switch (c) {
4606         case 'O':
4607             out_fmt = optarg;
4608             break;
4609         case 'f':
4610             fmt = optarg;
4611             break;
4612         case ':':
4613             missing_argument(argv[optind - 1]);
4614             break;
4615         case '?':
4616             unrecognized_option(argv[optind - 1]);
4617             break;
4618         case 'h':
4619             help();
4620             break;
4621         case 'U':
4622             force_share = true;
4623             break;
4624         case OPTION_OBJECT:
4625             if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) {
4626                 ret = -1;
4627                 goto out;
4628             }
4629             break;
4630         case OPTION_IMAGE_OPTS:
4631             image_opts = true;
4632             break;
4633         }
4634     }
4635 
4636     for (i = optind; i < argc; i++) {
4637         int j;
4638         arg = g_strdup(argv[i]);
4639 
4640         tmp = strchr(arg, '=');
4641         if (tmp == NULL) {
4642             error_report("unrecognized operand %s", arg);
4643             ret = -1;
4644             goto out;
4645         }
4646 
4647         *tmp++ = '\0';
4648 
4649         for (j = 0; options[j].name != NULL; j++) {
4650             if (!strcmp(arg, options[j].name)) {
4651                 break;
4652             }
4653         }
4654         if (options[j].name == NULL) {
4655             error_report("unrecognized operand %s", arg);
4656             ret = -1;
4657             goto out;
4658         }
4659 
4660         if (options[j].f(tmp, &in, &out, &dd) != 0) {
4661             ret = -1;
4662             goto out;
4663         }
4664         dd.flags |= options[j].flag;
4665         g_free(arg);
4666         arg = NULL;
4667     }
4668 
4669     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4670         error_report("Must specify both input and output files");
4671         ret = -1;
4672         goto out;
4673     }
4674 
4675     if (qemu_opts_foreach(&qemu_object_opts,
4676                           user_creatable_add_opts_foreach,
4677                           qemu_img_object_print_help, &error_fatal)) {
4678         ret = -1;
4679         goto out;
4680     }
4681 
4682     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
4683                     force_share);
4684 
4685     if (!blk1) {
4686         ret = -1;
4687         goto out;
4688     }
4689 
4690     drv = bdrv_find_format(out_fmt);
4691     if (!drv) {
4692         error_report("Unknown file format");
4693         ret = -1;
4694         goto out;
4695     }
4696     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4697 
4698     if (!proto_drv) {
4699         error_report_err(local_err);
4700         ret = -1;
4701         goto out;
4702     }
4703     if (!drv->create_opts) {
4704         error_report("Format driver '%s' does not support image creation",
4705                      drv->format_name);
4706         ret = -1;
4707         goto out;
4708     }
4709     if (!proto_drv->create_opts) {
4710         error_report("Protocol driver '%s' does not support image creation",
4711                      proto_drv->format_name);
4712         ret = -1;
4713         goto out;
4714     }
4715     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4716     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4717 
4718     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4719 
4720     size = blk_getlength(blk1);
4721     if (size < 0) {
4722         error_report("Failed to get size for '%s'", in.filename);
4723         ret = -1;
4724         goto out;
4725     }
4726 
4727     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4728         dd.count * in.bsz < size) {
4729         size = dd.count * in.bsz;
4730     }
4731 
4732     /* Overflow means the specified offset is beyond input image's size */
4733     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4734                               size < in.bsz * in.offset)) {
4735         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4736     } else {
4737         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4738                             size - in.bsz * in.offset, &error_abort);
4739     }
4740 
4741     ret = bdrv_create(drv, out.filename, opts, &local_err);
4742     if (ret < 0) {
4743         error_reportf_err(local_err,
4744                           "%s: error while creating output image: ",
4745                           out.filename);
4746         ret = -1;
4747         goto out;
4748     }
4749 
4750     /* TODO, we can't honour --image-opts for the target,
4751      * since it needs to be given in a format compatible
4752      * with the bdrv_create() call above which does not
4753      * support image-opts style.
4754      */
4755     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
4756                          false, false, false);
4757 
4758     if (!blk2) {
4759         ret = -1;
4760         goto out;
4761     }
4762 
4763     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4764                               size < in.offset * in.bsz)) {
4765         /* We give a warning if the skip option is bigger than the input
4766          * size and create an empty output disk image (i.e. like dd(1)).
4767          */
4768         error_report("%s: cannot skip to specified offset", in.filename);
4769         in_pos = size;
4770     } else {
4771         in_pos = in.offset * in.bsz;
4772     }
4773 
4774     in.buf = g_new(uint8_t, in.bsz);
4775 
4776     for (out_pos = 0; in_pos < size; block_count++) {
4777         int in_ret, out_ret;
4778 
4779         if (in_pos + in.bsz > size) {
4780             in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4781         } else {
4782             in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4783         }
4784         if (in_ret < 0) {
4785             error_report("error while reading from input image file: %s",
4786                          strerror(-in_ret));
4787             ret = -1;
4788             goto out;
4789         }
4790         in_pos += in_ret;
4791 
4792         out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4793 
4794         if (out_ret < 0) {
4795             error_report("error while writing to output image file: %s",
4796                          strerror(-out_ret));
4797             ret = -1;
4798             goto out;
4799         }
4800         out_pos += out_ret;
4801     }
4802 
4803 out:
4804     g_free(arg);
4805     qemu_opts_del(opts);
4806     qemu_opts_free(create_opts);
4807     blk_unref(blk1);
4808     blk_unref(blk2);
4809     g_free(in.filename);
4810     g_free(out.filename);
4811     g_free(in.buf);
4812     g_free(out.buf);
4813 
4814     if (ret) {
4815         return 1;
4816     }
4817     return 0;
4818 }
4819 
4820 static void dump_json_block_measure_info(BlockMeasureInfo *info)
4821 {
4822     QString *str;
4823     QObject *obj;
4824     Visitor *v = qobject_output_visitor_new(&obj);
4825 
4826     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
4827     visit_complete(v, &obj);
4828     str = qobject_to_json_pretty(obj);
4829     assert(str != NULL);
4830     printf("%s\n", qstring_get_str(str));
4831     qobject_unref(obj);
4832     visit_free(v);
4833     qobject_unref(str);
4834 }
4835 
4836 static int img_measure(int argc, char **argv)
4837 {
4838     static const struct option long_options[] = {
4839         {"help", no_argument, 0, 'h'},
4840         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4841         {"object", required_argument, 0, OPTION_OBJECT},
4842         {"output", required_argument, 0, OPTION_OUTPUT},
4843         {"size", required_argument, 0, OPTION_SIZE},
4844         {"force-share", no_argument, 0, 'U'},
4845         {0, 0, 0, 0}
4846     };
4847     OutputFormat output_format = OFORMAT_HUMAN;
4848     BlockBackend *in_blk = NULL;
4849     BlockDriver *drv;
4850     const char *filename = NULL;
4851     const char *fmt = NULL;
4852     const char *out_fmt = "raw";
4853     char *options = NULL;
4854     char *snapshot_name = NULL;
4855     bool force_share = false;
4856     QemuOpts *opts = NULL;
4857     QemuOpts *object_opts = NULL;
4858     QemuOpts *sn_opts = NULL;
4859     QemuOptsList *create_opts = NULL;
4860     bool image_opts = false;
4861     uint64_t img_size = UINT64_MAX;
4862     BlockMeasureInfo *info = NULL;
4863     Error *local_err = NULL;
4864     int ret = 1;
4865     int c;
4866 
4867     while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
4868                             long_options, NULL)) != -1) {
4869         switch (c) {
4870         case '?':
4871         case 'h':
4872             help();
4873             break;
4874         case 'f':
4875             fmt = optarg;
4876             break;
4877         case 'O':
4878             out_fmt = optarg;
4879             break;
4880         case 'o':
4881             if (accumulate_options(&options, optarg) < 0) {
4882                 goto out;
4883             }
4884             break;
4885         case 'l':
4886             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
4887                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
4888                                                   optarg, false);
4889                 if (!sn_opts) {
4890                     error_report("Failed in parsing snapshot param '%s'",
4891                                  optarg);
4892                     goto out;
4893                 }
4894             } else {
4895                 snapshot_name = optarg;
4896             }
4897             break;
4898         case 'U':
4899             force_share = true;
4900             break;
4901         case OPTION_OBJECT:
4902             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
4903                                                   optarg, true);
4904             if (!object_opts) {
4905                 goto out;
4906             }
4907             break;
4908         case OPTION_IMAGE_OPTS:
4909             image_opts = true;
4910             break;
4911         case OPTION_OUTPUT:
4912             if (!strcmp(optarg, "json")) {
4913                 output_format = OFORMAT_JSON;
4914             } else if (!strcmp(optarg, "human")) {
4915                 output_format = OFORMAT_HUMAN;
4916             } else {
4917                 error_report("--output must be used with human or json "
4918                              "as argument.");
4919                 goto out;
4920             }
4921             break;
4922         case OPTION_SIZE:
4923         {
4924             int64_t sval;
4925 
4926             sval = cvtnum(optarg);
4927             if (sval < 0) {
4928                 if (sval == -ERANGE) {
4929                     error_report("Image size must be less than 8 EiB!");
4930                 } else {
4931                     error_report("Invalid image size specified! You may use "
4932                                  "k, M, G, T, P or E suffixes for ");
4933                     error_report("kilobytes, megabytes, gigabytes, terabytes, "
4934                                  "petabytes and exabytes.");
4935                 }
4936                 goto out;
4937             }
4938             img_size = (uint64_t)sval;
4939         }
4940         break;
4941         }
4942     }
4943 
4944     if (qemu_opts_foreach(&qemu_object_opts,
4945                           user_creatable_add_opts_foreach,
4946                           qemu_img_object_print_help, &error_fatal)) {
4947         goto out;
4948     }
4949 
4950     if (argc - optind > 1) {
4951         error_report("At most one filename argument is allowed.");
4952         goto out;
4953     } else if (argc - optind == 1) {
4954         filename = argv[optind];
4955     }
4956 
4957     if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
4958         error_report("--image-opts, -f, and -l require a filename argument.");
4959         goto out;
4960     }
4961     if (filename && img_size != UINT64_MAX) {
4962         error_report("--size N cannot be used together with a filename.");
4963         goto out;
4964     }
4965     if (!filename && img_size == UINT64_MAX) {
4966         error_report("Either --size N or one filename must be specified.");
4967         goto out;
4968     }
4969 
4970     if (filename) {
4971         in_blk = img_open(image_opts, filename, fmt, 0,
4972                           false, false, force_share);
4973         if (!in_blk) {
4974             goto out;
4975         }
4976 
4977         if (sn_opts) {
4978             bdrv_snapshot_load_tmp(blk_bs(in_blk),
4979                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
4980                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
4981                     &local_err);
4982         } else if (snapshot_name != NULL) {
4983             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
4984                     snapshot_name, &local_err);
4985         }
4986         if (local_err) {
4987             error_reportf_err(local_err, "Failed to load snapshot: ");
4988             goto out;
4989         }
4990     }
4991 
4992     drv = bdrv_find_format(out_fmt);
4993     if (!drv) {
4994         error_report("Unknown file format '%s'", out_fmt);
4995         goto out;
4996     }
4997     if (!drv->create_opts) {
4998         error_report("Format driver '%s' does not support image creation",
4999                      drv->format_name);
5000         goto out;
5001     }
5002 
5003     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5004     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
5005     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5006     if (options) {
5007         qemu_opts_do_parse(opts, options, NULL, &local_err);
5008         if (local_err) {
5009             error_report_err(local_err);
5010             error_report("Invalid options for file format '%s'", out_fmt);
5011             goto out;
5012         }
5013     }
5014     if (img_size != UINT64_MAX) {
5015         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5016     }
5017 
5018     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
5019     if (local_err) {
5020         error_report_err(local_err);
5021         goto out;
5022     }
5023 
5024     if (output_format == OFORMAT_HUMAN) {
5025         printf("required size: %" PRIu64 "\n", info->required);
5026         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5027     } else {
5028         dump_json_block_measure_info(info);
5029     }
5030 
5031     ret = 0;
5032 
5033 out:
5034     qapi_free_BlockMeasureInfo(info);
5035     qemu_opts_del(object_opts);
5036     qemu_opts_del(opts);
5037     qemu_opts_del(sn_opts);
5038     qemu_opts_free(create_opts);
5039     g_free(options);
5040     blk_unref(in_blk);
5041     return ret;
5042 }
5043 
5044 static const img_cmd_t img_cmds[] = {
5045 #define DEF(option, callback, arg_string)        \
5046     { option, callback },
5047 #include "qemu-img-cmds.h"
5048 #undef DEF
5049     { NULL, NULL, },
5050 };
5051 
5052 int main(int argc, char **argv)
5053 {
5054     const img_cmd_t *cmd;
5055     const char *cmdname;
5056     Error *local_error = NULL;
5057     char *trace_file = NULL;
5058     int c;
5059     static const struct option long_options[] = {
5060         {"help", no_argument, 0, 'h'},
5061         {"version", no_argument, 0, 'V'},
5062         {"trace", required_argument, NULL, 'T'},
5063         {0, 0, 0, 0}
5064     };
5065 
5066 #ifdef CONFIG_POSIX
5067     signal(SIGPIPE, SIG_IGN);
5068 #endif
5069 
5070     error_init(argv[0]);
5071     module_call_init(MODULE_INIT_TRACE);
5072     qemu_init_exec_dir(argv[0]);
5073 
5074     if (qemu_init_main_loop(&local_error)) {
5075         error_report_err(local_error);
5076         exit(EXIT_FAILURE);
5077     }
5078 
5079     qcrypto_init(&error_fatal);
5080 
5081     module_call_init(MODULE_INIT_QOM);
5082     bdrv_init();
5083     if (argc < 2) {
5084         error_exit("Not enough arguments");
5085     }
5086 
5087     qemu_add_opts(&qemu_object_opts);
5088     qemu_add_opts(&qemu_source_opts);
5089     qemu_add_opts(&qemu_trace_opts);
5090 
5091     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5092         switch (c) {
5093         case ':':
5094             missing_argument(argv[optind - 1]);
5095             return 0;
5096         case '?':
5097             unrecognized_option(argv[optind - 1]);
5098             return 0;
5099         case 'h':
5100             help();
5101             return 0;
5102         case 'V':
5103             printf(QEMU_IMG_VERSION);
5104             return 0;
5105         case 'T':
5106             g_free(trace_file);
5107             trace_file = trace_opt_parse(optarg);
5108             break;
5109         }
5110     }
5111 
5112     cmdname = argv[optind];
5113 
5114     /* reset getopt_long scanning */
5115     argc -= optind;
5116     if (argc < 1) {
5117         return 0;
5118     }
5119     argv += optind;
5120     qemu_reset_optind();
5121 
5122     if (!trace_init_backends()) {
5123         exit(1);
5124     }
5125     trace_init_file(trace_file);
5126     qemu_set_log(LOG_TRACE);
5127 
5128     /* find the command */
5129     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5130         if (!strcmp(cmdname, cmd->name)) {
5131             return cmd->handler(argc, argv);
5132         }
5133     }
5134 
5135     /* not found */
5136     error_exit("Command not found: %s", cmdname);
5137 }
5138