xref: /openbmc/qemu/qemu-img.c (revision 64547a3b)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu-common.h"
29 #include "qemu-version.h"
30 #include "qapi/error.h"
31 #include "qapi/qapi-visit-block-core.h"
32 #include "qapi/qobject-output-visitor.h"
33 #include "qapi/qmp/qjson.h"
34 #include "qapi/qmp/qdict.h"
35 #include "qapi/qmp/qstring.h"
36 #include "qemu/cutils.h"
37 #include "qemu/config-file.h"
38 #include "qemu/option.h"
39 #include "qemu/error-report.h"
40 #include "qemu/log.h"
41 #include "qemu/main-loop.h"
42 #include "qemu/module.h"
43 #include "qemu/units.h"
44 #include "qom/object_interfaces.h"
45 #include "sysemu/block-backend.h"
46 #include "block/block_int.h"
47 #include "block/blockjob.h"
48 #include "block/qapi.h"
49 #include "crypto/init.h"
50 #include "trace/control.h"
51 
52 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
53                           "\n" QEMU_COPYRIGHT "\n"
54 
55 typedef struct img_cmd_t {
56     const char *name;
57     int (*handler)(int argc, char **argv);
58 } img_cmd_t;
59 
60 enum {
61     OPTION_OUTPUT = 256,
62     OPTION_BACKING_CHAIN = 257,
63     OPTION_OBJECT = 258,
64     OPTION_IMAGE_OPTS = 259,
65     OPTION_PATTERN = 260,
66     OPTION_FLUSH_INTERVAL = 261,
67     OPTION_NO_DRAIN = 262,
68     OPTION_TARGET_IMAGE_OPTS = 263,
69     OPTION_SIZE = 264,
70     OPTION_PREALLOCATION = 265,
71     OPTION_SHRINK = 266,
72     OPTION_SALVAGE = 267,
73     OPTION_TARGET_IS_ZERO = 268,
74 };
75 
76 typedef enum OutputFormat {
77     OFORMAT_JSON,
78     OFORMAT_HUMAN,
79 } OutputFormat;
80 
81 /* Default to cache=writeback as data integrity is not important for qemu-img */
82 #define BDRV_DEFAULT_CACHE "writeback"
83 
84 static void format_print(void *opaque, const char *name)
85 {
86     printf(" %s", name);
87 }
88 
89 static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
90 {
91     va_list ap;
92 
93     va_start(ap, fmt);
94     error_vreport(fmt, ap);
95     va_end(ap);
96 
97     error_printf("Try 'qemu-img --help' for more information\n");
98     exit(EXIT_FAILURE);
99 }
100 
101 static void QEMU_NORETURN missing_argument(const char *option)
102 {
103     error_exit("missing argument for option '%s'", option);
104 }
105 
106 static void QEMU_NORETURN unrecognized_option(const char *option)
107 {
108     error_exit("unrecognized option '%s'", option);
109 }
110 
111 /* Please keep in synch with qemu-img.texi */
112 static void QEMU_NORETURN help(void)
113 {
114     const char *help_msg =
115            QEMU_IMG_VERSION
116            "usage: qemu-img [standard options] command [command options]\n"
117            "QEMU disk image utility\n"
118            "\n"
119            "    '-h', '--help'       display this help and exit\n"
120            "    '-V', '--version'    output version information and exit\n"
121            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
122            "                         specify tracing options\n"
123            "\n"
124            "Command syntax:\n"
125 #define DEF(option, callback, arg_string)        \
126            "  " arg_string "\n"
127 #include "qemu-img-cmds.h"
128 #undef DEF
129            "\n"
130            "Command parameters:\n"
131            "  'filename' is a disk image filename\n"
132            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
133            "    manual page for a description of the object properties. The most common\n"
134            "    object type is a 'secret', which is used to supply passwords and/or\n"
135            "    encryption keys.\n"
136            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
137            "  'cache' is the cache mode used to write the output disk image, the valid\n"
138            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
139            "    'directsync' and 'unsafe' (default for convert)\n"
140            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
141            "    options are the same as for the 'cache' option\n"
142            "  'size' is the disk image size in bytes. Optional suffixes\n"
143            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
144            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
145            "    supported. 'b' is ignored.\n"
146            "  'output_filename' is the destination disk image filename\n"
147            "  'output_fmt' is the destination format\n"
148            "  'options' is a comma separated list of format specific options in a\n"
149            "    name=value format. Use -o ? for an overview of the options supported by the\n"
150            "    used format\n"
151            "  'snapshot_param' is param used for internal snapshot, format\n"
152            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
153            "    '[ID_OR_NAME]'\n"
154            "  '-c' indicates that target image must be compressed (qcow format only)\n"
155            "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
156            "       new backing file match exactly. The image doesn't need a working\n"
157            "       backing file before rebasing in this case (useful for renaming the\n"
158            "       backing file). For image creation, allow creating without attempting\n"
159            "       to open the backing file.\n"
160            "  '-h' with or without a command shows this help and lists the supported formats\n"
161            "  '-p' show progress of command (only certain commands)\n"
162            "  '-q' use Quiet mode - do not print any output (except errors)\n"
163            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
164            "       contain only zeros for qemu-img to create a sparse image during\n"
165            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
166            "       unallocated or zero sectors, and the destination image will always be\n"
167            "       fully allocated\n"
168            "  '--output' takes the format in which the output must be done (human or json)\n"
169            "  '-n' skips the target volume creation (useful if the volume is created\n"
170            "       prior to running qemu-img)\n"
171            "\n"
172            "Parameters to check subcommand:\n"
173            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
174            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
175            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
176            "       hiding corruption that has already occurred.\n"
177            "\n"
178            "Parameters to convert subcommand:\n"
179            "  '-m' specifies how many coroutines work in parallel during the convert\n"
180            "       process (defaults to 8)\n"
181            "  '-W' allow to write to the target out of order rather than sequential\n"
182            "\n"
183            "Parameters to snapshot subcommand:\n"
184            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
185            "  '-a' applies a snapshot (revert disk to saved state)\n"
186            "  '-c' creates a snapshot\n"
187            "  '-d' deletes a snapshot\n"
188            "  '-l' lists all snapshots in the given image\n"
189            "\n"
190            "Parameters to compare subcommand:\n"
191            "  '-f' first image format\n"
192            "  '-F' second image format\n"
193            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
194            "\n"
195            "Parameters to dd subcommand:\n"
196            "  'bs=BYTES' read and write up to BYTES bytes at a time "
197            "(default: 512)\n"
198            "  'count=N' copy only N input blocks\n"
199            "  'if=FILE' read from FILE\n"
200            "  'of=FILE' write to FILE\n"
201            "  'skip=N' skip N bs-sized blocks at the start of input\n";
202 
203     printf("%s\nSupported formats:", help_msg);
204     bdrv_iterate_format(format_print, NULL, false);
205     printf("\n\n" QEMU_HELP_BOTTOM "\n");
206     exit(EXIT_SUCCESS);
207 }
208 
209 static QemuOptsList qemu_object_opts = {
210     .name = "object",
211     .implied_opt_name = "qom-type",
212     .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
213     .desc = {
214         { }
215     },
216 };
217 
218 static bool qemu_img_object_print_help(const char *type, QemuOpts *opts)
219 {
220     if (user_creatable_print_help(type, opts)) {
221         exit(0);
222     }
223     return true;
224 }
225 
226 static QemuOptsList qemu_source_opts = {
227     .name = "source",
228     .implied_opt_name = "file",
229     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
230     .desc = {
231         { }
232     },
233 };
234 
235 static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
236 {
237     int ret = 0;
238     if (!quiet) {
239         va_list args;
240         va_start(args, fmt);
241         ret = vprintf(fmt, args);
242         va_end(args);
243     }
244     return ret;
245 }
246 
247 
248 static int print_block_option_help(const char *filename, const char *fmt)
249 {
250     BlockDriver *drv, *proto_drv;
251     QemuOptsList *create_opts = NULL;
252     Error *local_err = NULL;
253 
254     /* Find driver and parse its options */
255     drv = bdrv_find_format(fmt);
256     if (!drv) {
257         error_report("Unknown file format '%s'", fmt);
258         return 1;
259     }
260 
261     if (!drv->create_opts) {
262         error_report("Format driver '%s' does not support image creation", fmt);
263         return 1;
264     }
265 
266     create_opts = qemu_opts_append(create_opts, drv->create_opts);
267     if (filename) {
268         proto_drv = bdrv_find_protocol(filename, true, &local_err);
269         if (!proto_drv) {
270             error_report_err(local_err);
271             qemu_opts_free(create_opts);
272             return 1;
273         }
274         if (!proto_drv->create_opts) {
275             error_report("Protocol driver '%s' does not support image creation",
276                          proto_drv->format_name);
277             qemu_opts_free(create_opts);
278             return 1;
279         }
280         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
281     }
282 
283     if (filename) {
284         printf("Supported options:\n");
285     } else {
286         printf("Supported %s options:\n", fmt);
287     }
288     qemu_opts_print_help(create_opts, false);
289     qemu_opts_free(create_opts);
290 
291     if (!filename) {
292         printf("\n"
293                "The protocol level may support further options.\n"
294                "Specify the target filename to include those options.\n");
295     }
296 
297     return 0;
298 }
299 
300 
301 static BlockBackend *img_open_opts(const char *optstr,
302                                    QemuOpts *opts, int flags, bool writethrough,
303                                    bool quiet, bool force_share)
304 {
305     QDict *options;
306     Error *local_err = NULL;
307     BlockBackend *blk;
308     options = qemu_opts_to_qdict(opts, NULL);
309     if (force_share) {
310         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
311             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
312             error_report("--force-share/-U conflicts with image options");
313             qobject_unref(options);
314             return NULL;
315         }
316         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
317     }
318     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
319     if (!blk) {
320         error_reportf_err(local_err, "Could not open '%s': ", optstr);
321         return NULL;
322     }
323     blk_set_enable_write_cache(blk, !writethrough);
324 
325     return blk;
326 }
327 
328 static BlockBackend *img_open_file(const char *filename,
329                                    QDict *options,
330                                    const char *fmt, int flags,
331                                    bool writethrough, bool quiet,
332                                    bool force_share)
333 {
334     BlockBackend *blk;
335     Error *local_err = NULL;
336 
337     if (!options) {
338         options = qdict_new();
339     }
340     if (fmt) {
341         qdict_put_str(options, "driver", fmt);
342     }
343 
344     if (force_share) {
345         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
346     }
347     blk = blk_new_open(filename, NULL, options, flags, &local_err);
348     if (!blk) {
349         error_reportf_err(local_err, "Could not open '%s': ", filename);
350         return NULL;
351     }
352     blk_set_enable_write_cache(blk, !writethrough);
353 
354     return blk;
355 }
356 
357 
358 static int img_add_key_secrets(void *opaque,
359                                const char *name, const char *value,
360                                Error **errp)
361 {
362     QDict *options = opaque;
363 
364     if (g_str_has_suffix(name, "key-secret")) {
365         qdict_put_str(options, name, value);
366     }
367 
368     return 0;
369 }
370 
371 
372 static BlockBackend *img_open(bool image_opts,
373                               const char *filename,
374                               const char *fmt, int flags, bool writethrough,
375                               bool quiet, bool force_share)
376 {
377     BlockBackend *blk;
378     if (image_opts) {
379         QemuOpts *opts;
380         if (fmt) {
381             error_report("--image-opts and --format are mutually exclusive");
382             return NULL;
383         }
384         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
385                                        filename, true);
386         if (!opts) {
387             return NULL;
388         }
389         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
390                             force_share);
391     } else {
392         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
393                             force_share);
394     }
395     return blk;
396 }
397 
398 
399 static int add_old_style_options(const char *fmt, QemuOpts *opts,
400                                  const char *base_filename,
401                                  const char *base_fmt)
402 {
403     Error *err = NULL;
404 
405     if (base_filename) {
406         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
407         if (err) {
408             error_report("Backing file not supported for file format '%s'",
409                          fmt);
410             error_free(err);
411             return -1;
412         }
413     }
414     if (base_fmt) {
415         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
416         if (err) {
417             error_report("Backing file format not supported for file "
418                          "format '%s'", fmt);
419             error_free(err);
420             return -1;
421         }
422     }
423     return 0;
424 }
425 
426 static int64_t cvtnum(const char *s)
427 {
428     int err;
429     uint64_t value;
430 
431     err = qemu_strtosz(s, NULL, &value);
432     if (err < 0) {
433         return err;
434     }
435     if (value > INT64_MAX) {
436         return -ERANGE;
437     }
438     return value;
439 }
440 
441 static int img_create(int argc, char **argv)
442 {
443     int c;
444     uint64_t img_size = -1;
445     const char *fmt = "raw";
446     const char *base_fmt = NULL;
447     const char *filename;
448     const char *base_filename = NULL;
449     char *options = NULL;
450     Error *local_err = NULL;
451     bool quiet = false;
452     int flags = 0;
453 
454     for(;;) {
455         static const struct option long_options[] = {
456             {"help", no_argument, 0, 'h'},
457             {"object", required_argument, 0, OPTION_OBJECT},
458             {0, 0, 0, 0}
459         };
460         c = getopt_long(argc, argv, ":F:b:f:ho:qu",
461                         long_options, NULL);
462         if (c == -1) {
463             break;
464         }
465         switch(c) {
466         case ':':
467             missing_argument(argv[optind - 1]);
468             break;
469         case '?':
470             unrecognized_option(argv[optind - 1]);
471             break;
472         case 'h':
473             help();
474             break;
475         case 'F':
476             base_fmt = optarg;
477             break;
478         case 'b':
479             base_filename = optarg;
480             break;
481         case 'f':
482             fmt = optarg;
483             break;
484         case 'o':
485             if (!is_valid_option_list(optarg)) {
486                 error_report("Invalid option list: %s", optarg);
487                 goto fail;
488             }
489             if (!options) {
490                 options = g_strdup(optarg);
491             } else {
492                 char *old_options = options;
493                 options = g_strdup_printf("%s,%s", options, optarg);
494                 g_free(old_options);
495             }
496             break;
497         case 'q':
498             quiet = true;
499             break;
500         case 'u':
501             flags |= BDRV_O_NO_BACKING;
502             break;
503         case OPTION_OBJECT: {
504             QemuOpts *opts;
505             opts = qemu_opts_parse_noisily(&qemu_object_opts,
506                                            optarg, true);
507             if (!opts) {
508                 goto fail;
509             }
510         }   break;
511         }
512     }
513 
514     /* Get the filename */
515     filename = (optind < argc) ? argv[optind] : NULL;
516     if (options && has_help_option(options)) {
517         g_free(options);
518         return print_block_option_help(filename, fmt);
519     }
520 
521     if (optind >= argc) {
522         error_exit("Expecting image file name");
523     }
524     optind++;
525 
526     if (qemu_opts_foreach(&qemu_object_opts,
527                           user_creatable_add_opts_foreach,
528                           qemu_img_object_print_help, &error_fatal)) {
529         goto fail;
530     }
531 
532     /* Get image size, if specified */
533     if (optind < argc) {
534         int64_t sval;
535 
536         sval = cvtnum(argv[optind++]);
537         if (sval < 0) {
538             if (sval == -ERANGE) {
539                 error_report("Image size must be less than 8 EiB!");
540             } else {
541                 error_report("Invalid image size specified! You may use k, M, "
542                       "G, T, P or E suffixes for ");
543                 error_report("kilobytes, megabytes, gigabytes, terabytes, "
544                              "petabytes and exabytes.");
545             }
546             goto fail;
547         }
548         img_size = (uint64_t)sval;
549     }
550     if (optind != argc) {
551         error_exit("Unexpected argument: %s", argv[optind]);
552     }
553 
554     bdrv_img_create(filename, fmt, base_filename, base_fmt,
555                     options, img_size, flags, quiet, &local_err);
556     if (local_err) {
557         error_reportf_err(local_err, "%s: ", filename);
558         goto fail;
559     }
560 
561     g_free(options);
562     return 0;
563 
564 fail:
565     g_free(options);
566     return 1;
567 }
568 
569 static void dump_json_image_check(ImageCheck *check, bool quiet)
570 {
571     QString *str;
572     QObject *obj;
573     Visitor *v = qobject_output_visitor_new(&obj);
574 
575     visit_type_ImageCheck(v, NULL, &check, &error_abort);
576     visit_complete(v, &obj);
577     str = qobject_to_json_pretty(obj);
578     assert(str != NULL);
579     qprintf(quiet, "%s\n", qstring_get_str(str));
580     qobject_unref(obj);
581     visit_free(v);
582     qobject_unref(str);
583 }
584 
585 static void dump_human_image_check(ImageCheck *check, bool quiet)
586 {
587     if (!(check->corruptions || check->leaks || check->check_errors)) {
588         qprintf(quiet, "No errors were found on the image.\n");
589     } else {
590         if (check->corruptions) {
591             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
592                     "Data may be corrupted, or further writes to the image "
593                     "may corrupt it.\n",
594                     check->corruptions);
595         }
596 
597         if (check->leaks) {
598             qprintf(quiet,
599                     "\n%" PRId64 " leaked clusters were found on the image.\n"
600                     "This means waste of disk space, but no harm to data.\n",
601                     check->leaks);
602         }
603 
604         if (check->check_errors) {
605             qprintf(quiet,
606                     "\n%" PRId64
607                     " internal errors have occurred during the check.\n",
608                     check->check_errors);
609         }
610     }
611 
612     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
613         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
614                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
615                 check->allocated_clusters, check->total_clusters,
616                 check->allocated_clusters * 100.0 / check->total_clusters,
617                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
618                 check->compressed_clusters * 100.0 /
619                 check->allocated_clusters);
620     }
621 
622     if (check->image_end_offset) {
623         qprintf(quiet,
624                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
625     }
626 }
627 
628 static int collect_image_check(BlockDriverState *bs,
629                    ImageCheck *check,
630                    const char *filename,
631                    const char *fmt,
632                    int fix)
633 {
634     int ret;
635     BdrvCheckResult result;
636 
637     ret = bdrv_check(bs, &result, fix);
638     if (ret < 0) {
639         return ret;
640     }
641 
642     check->filename                 = g_strdup(filename);
643     check->format                   = g_strdup(bdrv_get_format_name(bs));
644     check->check_errors             = result.check_errors;
645     check->corruptions              = result.corruptions;
646     check->has_corruptions          = result.corruptions != 0;
647     check->leaks                    = result.leaks;
648     check->has_leaks                = result.leaks != 0;
649     check->corruptions_fixed        = result.corruptions_fixed;
650     check->has_corruptions_fixed    = result.corruptions_fixed != 0;
651     check->leaks_fixed              = result.leaks_fixed;
652     check->has_leaks_fixed          = result.leaks_fixed != 0;
653     check->image_end_offset         = result.image_end_offset;
654     check->has_image_end_offset     = result.image_end_offset != 0;
655     check->total_clusters           = result.bfi.total_clusters;
656     check->has_total_clusters       = result.bfi.total_clusters != 0;
657     check->allocated_clusters       = result.bfi.allocated_clusters;
658     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
659     check->fragmented_clusters      = result.bfi.fragmented_clusters;
660     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
661     check->compressed_clusters      = result.bfi.compressed_clusters;
662     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
663 
664     return 0;
665 }
666 
667 /*
668  * Checks an image for consistency. Exit codes:
669  *
670  *  0 - Check completed, image is good
671  *  1 - Check not completed because of internal errors
672  *  2 - Check completed, image is corrupted
673  *  3 - Check completed, image has leaked clusters, but is good otherwise
674  * 63 - Checks are not supported by the image format
675  */
676 static int img_check(int argc, char **argv)
677 {
678     int c, ret;
679     OutputFormat output_format = OFORMAT_HUMAN;
680     const char *filename, *fmt, *output, *cache;
681     BlockBackend *blk;
682     BlockDriverState *bs;
683     int fix = 0;
684     int flags = BDRV_O_CHECK;
685     bool writethrough;
686     ImageCheck *check;
687     bool quiet = false;
688     bool image_opts = false;
689     bool force_share = false;
690 
691     fmt = NULL;
692     output = NULL;
693     cache = BDRV_DEFAULT_CACHE;
694 
695     for(;;) {
696         int option_index = 0;
697         static const struct option long_options[] = {
698             {"help", no_argument, 0, 'h'},
699             {"format", required_argument, 0, 'f'},
700             {"repair", required_argument, 0, 'r'},
701             {"output", required_argument, 0, OPTION_OUTPUT},
702             {"object", required_argument, 0, OPTION_OBJECT},
703             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
704             {"force-share", no_argument, 0, 'U'},
705             {0, 0, 0, 0}
706         };
707         c = getopt_long(argc, argv, ":hf:r:T:qU",
708                         long_options, &option_index);
709         if (c == -1) {
710             break;
711         }
712         switch(c) {
713         case ':':
714             missing_argument(argv[optind - 1]);
715             break;
716         case '?':
717             unrecognized_option(argv[optind - 1]);
718             break;
719         case 'h':
720             help();
721             break;
722         case 'f':
723             fmt = optarg;
724             break;
725         case 'r':
726             flags |= BDRV_O_RDWR;
727 
728             if (!strcmp(optarg, "leaks")) {
729                 fix = BDRV_FIX_LEAKS;
730             } else if (!strcmp(optarg, "all")) {
731                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
732             } else {
733                 error_exit("Unknown option value for -r "
734                            "(expecting 'leaks' or 'all'): %s", optarg);
735             }
736             break;
737         case OPTION_OUTPUT:
738             output = optarg;
739             break;
740         case 'T':
741             cache = optarg;
742             break;
743         case 'q':
744             quiet = true;
745             break;
746         case 'U':
747             force_share = true;
748             break;
749         case OPTION_OBJECT: {
750             QemuOpts *opts;
751             opts = qemu_opts_parse_noisily(&qemu_object_opts,
752                                            optarg, true);
753             if (!opts) {
754                 return 1;
755             }
756         }   break;
757         case OPTION_IMAGE_OPTS:
758             image_opts = true;
759             break;
760         }
761     }
762     if (optind != argc - 1) {
763         error_exit("Expecting one image file name");
764     }
765     filename = argv[optind++];
766 
767     if (output && !strcmp(output, "json")) {
768         output_format = OFORMAT_JSON;
769     } else if (output && !strcmp(output, "human")) {
770         output_format = OFORMAT_HUMAN;
771     } else if (output) {
772         error_report("--output must be used with human or json as argument.");
773         return 1;
774     }
775 
776     if (qemu_opts_foreach(&qemu_object_opts,
777                           user_creatable_add_opts_foreach,
778                           qemu_img_object_print_help, &error_fatal)) {
779         return 1;
780     }
781 
782     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
783     if (ret < 0) {
784         error_report("Invalid source cache option: %s", cache);
785         return 1;
786     }
787 
788     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
789                    force_share);
790     if (!blk) {
791         return 1;
792     }
793     bs = blk_bs(blk);
794 
795     check = g_new0(ImageCheck, 1);
796     ret = collect_image_check(bs, check, filename, fmt, fix);
797 
798     if (ret == -ENOTSUP) {
799         error_report("This image format does not support checks");
800         ret = 63;
801         goto fail;
802     }
803 
804     if (check->corruptions_fixed || check->leaks_fixed) {
805         int corruptions_fixed, leaks_fixed;
806         bool has_leaks_fixed, has_corruptions_fixed;
807 
808         leaks_fixed         = check->leaks_fixed;
809         has_leaks_fixed     = check->has_leaks_fixed;
810         corruptions_fixed   = check->corruptions_fixed;
811         has_corruptions_fixed = check->has_corruptions_fixed;
812 
813         if (output_format == OFORMAT_HUMAN) {
814             qprintf(quiet,
815                     "The following inconsistencies were found and repaired:\n\n"
816                     "    %" PRId64 " leaked clusters\n"
817                     "    %" PRId64 " corruptions\n\n"
818                     "Double checking the fixed image now...\n",
819                     check->leaks_fixed,
820                     check->corruptions_fixed);
821         }
822 
823         qapi_free_ImageCheck(check);
824         check = g_new0(ImageCheck, 1);
825         ret = collect_image_check(bs, check, filename, fmt, 0);
826 
827         check->leaks_fixed          = leaks_fixed;
828         check->has_leaks_fixed      = has_leaks_fixed;
829         check->corruptions_fixed    = corruptions_fixed;
830         check->has_corruptions_fixed = has_corruptions_fixed;
831     }
832 
833     if (!ret) {
834         switch (output_format) {
835         case OFORMAT_HUMAN:
836             dump_human_image_check(check, quiet);
837             break;
838         case OFORMAT_JSON:
839             dump_json_image_check(check, quiet);
840             break;
841         }
842     }
843 
844     if (ret || check->check_errors) {
845         if (ret) {
846             error_report("Check failed: %s", strerror(-ret));
847         } else {
848             error_report("Check failed");
849         }
850         ret = 1;
851         goto fail;
852     }
853 
854     if (check->corruptions) {
855         ret = 2;
856     } else if (check->leaks) {
857         ret = 3;
858     } else {
859         ret = 0;
860     }
861 
862 fail:
863     qapi_free_ImageCheck(check);
864     blk_unref(blk);
865     return ret;
866 }
867 
868 typedef struct CommonBlockJobCBInfo {
869     BlockDriverState *bs;
870     Error **errp;
871 } CommonBlockJobCBInfo;
872 
873 static void common_block_job_cb(void *opaque, int ret)
874 {
875     CommonBlockJobCBInfo *cbi = opaque;
876 
877     if (ret < 0) {
878         error_setg_errno(cbi->errp, -ret, "Block job failed");
879     }
880 }
881 
882 static void run_block_job(BlockJob *job, Error **errp)
883 {
884     AioContext *aio_context = blk_get_aio_context(job->blk);
885     int ret = 0;
886 
887     aio_context_acquire(aio_context);
888     job_ref(&job->job);
889     do {
890         float progress = 0.0f;
891         aio_poll(aio_context, true);
892         if (job->job.progress.total) {
893             progress = (float)job->job.progress.current /
894                        job->job.progress.total * 100.f;
895         }
896         qemu_progress_print(progress, 0);
897     } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
898 
899     if (!job_is_completed(&job->job)) {
900         ret = job_complete_sync(&job->job, errp);
901     } else {
902         ret = job->job.ret;
903     }
904     job_unref(&job->job);
905     aio_context_release(aio_context);
906 
907     /* publish completion progress only when success */
908     if (!ret) {
909         qemu_progress_print(100.f, 0);
910     }
911 }
912 
913 static int img_commit(int argc, char **argv)
914 {
915     int c, ret, flags;
916     const char *filename, *fmt, *cache, *base;
917     BlockBackend *blk;
918     BlockDriverState *bs, *base_bs;
919     BlockJob *job;
920     bool progress = false, quiet = false, drop = false;
921     bool writethrough;
922     Error *local_err = NULL;
923     CommonBlockJobCBInfo cbi;
924     bool image_opts = false;
925     AioContext *aio_context;
926 
927     fmt = NULL;
928     cache = BDRV_DEFAULT_CACHE;
929     base = NULL;
930     for(;;) {
931         static const struct option long_options[] = {
932             {"help", no_argument, 0, 'h'},
933             {"object", required_argument, 0, OPTION_OBJECT},
934             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
935             {0, 0, 0, 0}
936         };
937         c = getopt_long(argc, argv, ":f:ht:b:dpq",
938                         long_options, NULL);
939         if (c == -1) {
940             break;
941         }
942         switch(c) {
943         case ':':
944             missing_argument(argv[optind - 1]);
945             break;
946         case '?':
947             unrecognized_option(argv[optind - 1]);
948             break;
949         case 'h':
950             help();
951             break;
952         case 'f':
953             fmt = optarg;
954             break;
955         case 't':
956             cache = optarg;
957             break;
958         case 'b':
959             base = optarg;
960             /* -b implies -d */
961             drop = true;
962             break;
963         case 'd':
964             drop = true;
965             break;
966         case 'p':
967             progress = true;
968             break;
969         case 'q':
970             quiet = true;
971             break;
972         case OPTION_OBJECT: {
973             QemuOpts *opts;
974             opts = qemu_opts_parse_noisily(&qemu_object_opts,
975                                            optarg, true);
976             if (!opts) {
977                 return 1;
978             }
979         }   break;
980         case OPTION_IMAGE_OPTS:
981             image_opts = true;
982             break;
983         }
984     }
985 
986     /* Progress is not shown in Quiet mode */
987     if (quiet) {
988         progress = false;
989     }
990 
991     if (optind != argc - 1) {
992         error_exit("Expecting one image file name");
993     }
994     filename = argv[optind++];
995 
996     if (qemu_opts_foreach(&qemu_object_opts,
997                           user_creatable_add_opts_foreach,
998                           qemu_img_object_print_help, &error_fatal)) {
999         return 1;
1000     }
1001 
1002     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
1003     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1004     if (ret < 0) {
1005         error_report("Invalid cache option: %s", cache);
1006         return 1;
1007     }
1008 
1009     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1010                    false);
1011     if (!blk) {
1012         return 1;
1013     }
1014     bs = blk_bs(blk);
1015 
1016     qemu_progress_init(progress, 1.f);
1017     qemu_progress_print(0.f, 100);
1018 
1019     if (base) {
1020         base_bs = bdrv_find_backing_image(bs, base);
1021         if (!base_bs) {
1022             error_setg(&local_err,
1023                        "Did not find '%s' in the backing chain of '%s'",
1024                        base, filename);
1025             goto done;
1026         }
1027     } else {
1028         /* This is different from QMP, which by default uses the deepest file in
1029          * the backing chain (i.e., the very base); however, the traditional
1030          * behavior of qemu-img commit is using the immediate backing file. */
1031         base_bs = backing_bs(bs);
1032         if (!base_bs) {
1033             error_setg(&local_err, "Image does not have a backing file");
1034             goto done;
1035         }
1036     }
1037 
1038     cbi = (CommonBlockJobCBInfo){
1039         .errp = &local_err,
1040         .bs   = bs,
1041     };
1042 
1043     aio_context = bdrv_get_aio_context(bs);
1044     aio_context_acquire(aio_context);
1045     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, 0,
1046                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1047                         &cbi, false, &local_err);
1048     aio_context_release(aio_context);
1049     if (local_err) {
1050         goto done;
1051     }
1052 
1053     /* When the block job completes, the BlockBackend reference will point to
1054      * the old backing file. In order to avoid that the top image is already
1055      * deleted, so we can still empty it afterwards, increment the reference
1056      * counter here preemptively. */
1057     if (!drop) {
1058         bdrv_ref(bs);
1059     }
1060 
1061     job = block_job_get("commit");
1062     assert(job);
1063     run_block_job(job, &local_err);
1064     if (local_err) {
1065         goto unref_backing;
1066     }
1067 
1068     if (!drop && bs->drv->bdrv_make_empty) {
1069         ret = bs->drv->bdrv_make_empty(bs);
1070         if (ret) {
1071             error_setg_errno(&local_err, -ret, "Could not empty %s",
1072                              filename);
1073             goto unref_backing;
1074         }
1075     }
1076 
1077 unref_backing:
1078     if (!drop) {
1079         bdrv_unref(bs);
1080     }
1081 
1082 done:
1083     qemu_progress_end();
1084 
1085     blk_unref(blk);
1086 
1087     if (local_err) {
1088         error_report_err(local_err);
1089         return 1;
1090     }
1091 
1092     qprintf(quiet, "Image committed.\n");
1093     return 0;
1094 }
1095 
1096 /*
1097  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1098  * of the first sector boundary within buf where the sector contains a
1099  * non-zero byte.  This function is robust to a buffer that is not
1100  * sector-aligned.
1101  */
1102 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1103 {
1104     int64_t i;
1105     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1106 
1107     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1108         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1109             return i;
1110         }
1111     }
1112     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1113         return i;
1114     }
1115     return -1;
1116 }
1117 
1118 /*
1119  * Returns true iff the first sector pointed to by 'buf' contains at least
1120  * a non-NUL byte.
1121  *
1122  * 'pnum' is set to the number of sectors (including and immediately following
1123  * the first one) that are known to be in the same allocated/unallocated state.
1124  * The function will try to align the end offset to alignment boundaries so
1125  * that the request will at least end aligned and consequtive requests will
1126  * also start at an aligned offset.
1127  */
1128 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1129                                 int64_t sector_num, int alignment)
1130 {
1131     bool is_zero;
1132     int i, tail;
1133 
1134     if (n <= 0) {
1135         *pnum = 0;
1136         return 0;
1137     }
1138     is_zero = buffer_is_zero(buf, 512);
1139     for(i = 1; i < n; i++) {
1140         buf += 512;
1141         if (is_zero != buffer_is_zero(buf, 512)) {
1142             break;
1143         }
1144     }
1145 
1146     tail = (sector_num + i) & (alignment - 1);
1147     if (tail) {
1148         if (is_zero && i <= tail) {
1149             /* treat unallocated areas which only consist
1150              * of a small tail as allocated. */
1151             is_zero = false;
1152         }
1153         if (!is_zero) {
1154             /* align up end offset of allocated areas. */
1155             i += alignment - tail;
1156             i = MIN(i, n);
1157         } else {
1158             /* align down end offset of zero areas. */
1159             i -= tail;
1160         }
1161     }
1162     *pnum = i;
1163     return !is_zero;
1164 }
1165 
1166 /*
1167  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1168  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1169  * breaking up write requests for only small sparse areas.
1170  */
1171 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1172     int min, int64_t sector_num, int alignment)
1173 {
1174     int ret;
1175     int num_checked, num_used;
1176 
1177     if (n < min) {
1178         min = n;
1179     }
1180 
1181     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1182     if (!ret) {
1183         return ret;
1184     }
1185 
1186     num_used = *pnum;
1187     buf += BDRV_SECTOR_SIZE * *pnum;
1188     n -= *pnum;
1189     sector_num += *pnum;
1190     num_checked = num_used;
1191 
1192     while (n > 0) {
1193         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1194 
1195         buf += BDRV_SECTOR_SIZE * *pnum;
1196         n -= *pnum;
1197         sector_num += *pnum;
1198         num_checked += *pnum;
1199         if (ret) {
1200             num_used = num_checked;
1201         } else if (*pnum >= min) {
1202             break;
1203         }
1204     }
1205 
1206     *pnum = num_used;
1207     return 1;
1208 }
1209 
1210 /*
1211  * Compares two buffers sector by sector. Returns 0 if the first
1212  * sector of each buffer matches, non-zero otherwise.
1213  *
1214  * pnum is set to the sector-aligned size of the buffer prefix that
1215  * has the same matching status as the first sector.
1216  */
1217 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1218                            int64_t bytes, int64_t *pnum)
1219 {
1220     bool res;
1221     int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
1222 
1223     assert(bytes > 0);
1224 
1225     res = !!memcmp(buf1, buf2, i);
1226     while (i < bytes) {
1227         int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
1228 
1229         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1230             break;
1231         }
1232         i += len;
1233     }
1234 
1235     *pnum = i;
1236     return res;
1237 }
1238 
1239 #define IO_BUF_SIZE (2 * MiB)
1240 
1241 /*
1242  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1243  *
1244  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1245  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1246  * failure), and 4 on error (the exit status for read errors), after emitting
1247  * an error message.
1248  *
1249  * @param blk:  BlockBackend for the image
1250  * @param offset: Starting offset to check
1251  * @param bytes: Number of bytes to check
1252  * @param filename: Name of disk file we are checking (logging purpose)
1253  * @param buffer: Allocated buffer for storing read data
1254  * @param quiet: Flag for quiet mode
1255  */
1256 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1257                                int64_t bytes, const char *filename,
1258                                uint8_t *buffer, bool quiet)
1259 {
1260     int ret = 0;
1261     int64_t idx;
1262 
1263     ret = blk_pread(blk, offset, buffer, bytes);
1264     if (ret < 0) {
1265         error_report("Error while reading offset %" PRId64 " of %s: %s",
1266                      offset, filename, strerror(-ret));
1267         return 4;
1268     }
1269     idx = find_nonzero(buffer, bytes);
1270     if (idx >= 0) {
1271         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1272                 offset + idx);
1273         return 1;
1274     }
1275 
1276     return 0;
1277 }
1278 
1279 /*
1280  * Compares two images. Exit codes:
1281  *
1282  * 0 - Images are identical
1283  * 1 - Images differ
1284  * >1 - Error occurred
1285  */
1286 static int img_compare(int argc, char **argv)
1287 {
1288     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1289     BlockBackend *blk1, *blk2;
1290     BlockDriverState *bs1, *bs2;
1291     int64_t total_size1, total_size2;
1292     uint8_t *buf1 = NULL, *buf2 = NULL;
1293     int64_t pnum1, pnum2;
1294     int allocated1, allocated2;
1295     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1296     bool progress = false, quiet = false, strict = false;
1297     int flags;
1298     bool writethrough;
1299     int64_t total_size;
1300     int64_t offset = 0;
1301     int64_t chunk;
1302     int c;
1303     uint64_t progress_base;
1304     bool image_opts = false;
1305     bool force_share = false;
1306 
1307     cache = BDRV_DEFAULT_CACHE;
1308     for (;;) {
1309         static const struct option long_options[] = {
1310             {"help", no_argument, 0, 'h'},
1311             {"object", required_argument, 0, OPTION_OBJECT},
1312             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1313             {"force-share", no_argument, 0, 'U'},
1314             {0, 0, 0, 0}
1315         };
1316         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1317                         long_options, NULL);
1318         if (c == -1) {
1319             break;
1320         }
1321         switch (c) {
1322         case ':':
1323             missing_argument(argv[optind - 1]);
1324             break;
1325         case '?':
1326             unrecognized_option(argv[optind - 1]);
1327             break;
1328         case 'h':
1329             help();
1330             break;
1331         case 'f':
1332             fmt1 = optarg;
1333             break;
1334         case 'F':
1335             fmt2 = optarg;
1336             break;
1337         case 'T':
1338             cache = optarg;
1339             break;
1340         case 'p':
1341             progress = true;
1342             break;
1343         case 'q':
1344             quiet = true;
1345             break;
1346         case 's':
1347             strict = true;
1348             break;
1349         case 'U':
1350             force_share = true;
1351             break;
1352         case OPTION_OBJECT: {
1353             QemuOpts *opts;
1354             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1355                                            optarg, true);
1356             if (!opts) {
1357                 ret = 2;
1358                 goto out4;
1359             }
1360         }   break;
1361         case OPTION_IMAGE_OPTS:
1362             image_opts = true;
1363             break;
1364         }
1365     }
1366 
1367     /* Progress is not shown in Quiet mode */
1368     if (quiet) {
1369         progress = false;
1370     }
1371 
1372 
1373     if (optind != argc - 2) {
1374         error_exit("Expecting two image file names");
1375     }
1376     filename1 = argv[optind++];
1377     filename2 = argv[optind++];
1378 
1379     if (qemu_opts_foreach(&qemu_object_opts,
1380                           user_creatable_add_opts_foreach,
1381                           qemu_img_object_print_help, &error_fatal)) {
1382         ret = 2;
1383         goto out4;
1384     }
1385 
1386     /* Initialize before goto out */
1387     qemu_progress_init(progress, 2.0);
1388 
1389     flags = 0;
1390     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1391     if (ret < 0) {
1392         error_report("Invalid source cache option: %s", cache);
1393         ret = 2;
1394         goto out3;
1395     }
1396 
1397     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1398                     force_share);
1399     if (!blk1) {
1400         ret = 2;
1401         goto out3;
1402     }
1403 
1404     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1405                     force_share);
1406     if (!blk2) {
1407         ret = 2;
1408         goto out2;
1409     }
1410     bs1 = blk_bs(blk1);
1411     bs2 = blk_bs(blk2);
1412 
1413     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1414     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1415     total_size1 = blk_getlength(blk1);
1416     if (total_size1 < 0) {
1417         error_report("Can't get size of %s: %s",
1418                      filename1, strerror(-total_size1));
1419         ret = 4;
1420         goto out;
1421     }
1422     total_size2 = blk_getlength(blk2);
1423     if (total_size2 < 0) {
1424         error_report("Can't get size of %s: %s",
1425                      filename2, strerror(-total_size2));
1426         ret = 4;
1427         goto out;
1428     }
1429     total_size = MIN(total_size1, total_size2);
1430     progress_base = MAX(total_size1, total_size2);
1431 
1432     qemu_progress_print(0, 100);
1433 
1434     if (strict && total_size1 != total_size2) {
1435         ret = 1;
1436         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1437         goto out;
1438     }
1439 
1440     while (offset < total_size) {
1441         int status1, status2;
1442 
1443         status1 = bdrv_block_status_above(bs1, NULL, offset,
1444                                           total_size1 - offset, &pnum1, NULL,
1445                                           NULL);
1446         if (status1 < 0) {
1447             ret = 3;
1448             error_report("Sector allocation test failed for %s", filename1);
1449             goto out;
1450         }
1451         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1452 
1453         status2 = bdrv_block_status_above(bs2, NULL, offset,
1454                                           total_size2 - offset, &pnum2, NULL,
1455                                           NULL);
1456         if (status2 < 0) {
1457             ret = 3;
1458             error_report("Sector allocation test failed for %s", filename2);
1459             goto out;
1460         }
1461         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1462 
1463         assert(pnum1 && pnum2);
1464         chunk = MIN(pnum1, pnum2);
1465 
1466         if (strict) {
1467             if (status1 != status2) {
1468                 ret = 1;
1469                 qprintf(quiet, "Strict mode: Offset %" PRId64
1470                         " block status mismatch!\n", offset);
1471                 goto out;
1472             }
1473         }
1474         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1475             /* nothing to do */
1476         } else if (allocated1 == allocated2) {
1477             if (allocated1) {
1478                 int64_t pnum;
1479 
1480                 chunk = MIN(chunk, IO_BUF_SIZE);
1481                 ret = blk_pread(blk1, offset, buf1, chunk);
1482                 if (ret < 0) {
1483                     error_report("Error while reading offset %" PRId64
1484                                  " of %s: %s",
1485                                  offset, filename1, strerror(-ret));
1486                     ret = 4;
1487                     goto out;
1488                 }
1489                 ret = blk_pread(blk2, offset, buf2, chunk);
1490                 if (ret < 0) {
1491                     error_report("Error while reading offset %" PRId64
1492                                  " of %s: %s",
1493                                  offset, filename2, strerror(-ret));
1494                     ret = 4;
1495                     goto out;
1496                 }
1497                 ret = compare_buffers(buf1, buf2, chunk, &pnum);
1498                 if (ret || pnum != chunk) {
1499                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1500                             offset + (ret ? 0 : pnum));
1501                     ret = 1;
1502                     goto out;
1503                 }
1504             }
1505         } else {
1506             chunk = MIN(chunk, IO_BUF_SIZE);
1507             if (allocated1) {
1508                 ret = check_empty_sectors(blk1, offset, chunk,
1509                                           filename1, buf1, quiet);
1510             } else {
1511                 ret = check_empty_sectors(blk2, offset, chunk,
1512                                           filename2, buf1, quiet);
1513             }
1514             if (ret) {
1515                 goto out;
1516             }
1517         }
1518         offset += chunk;
1519         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1520     }
1521 
1522     if (total_size1 != total_size2) {
1523         BlockBackend *blk_over;
1524         const char *filename_over;
1525 
1526         qprintf(quiet, "Warning: Image size mismatch!\n");
1527         if (total_size1 > total_size2) {
1528             blk_over = blk1;
1529             filename_over = filename1;
1530         } else {
1531             blk_over = blk2;
1532             filename_over = filename2;
1533         }
1534 
1535         while (offset < progress_base) {
1536             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1537                                           progress_base - offset, &chunk,
1538                                           NULL, NULL);
1539             if (ret < 0) {
1540                 ret = 3;
1541                 error_report("Sector allocation test failed for %s",
1542                              filename_over);
1543                 goto out;
1544 
1545             }
1546             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1547                 chunk = MIN(chunk, IO_BUF_SIZE);
1548                 ret = check_empty_sectors(blk_over, offset, chunk,
1549                                           filename_over, buf1, quiet);
1550                 if (ret) {
1551                     goto out;
1552                 }
1553             }
1554             offset += chunk;
1555             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1556         }
1557     }
1558 
1559     qprintf(quiet, "Images are identical.\n");
1560     ret = 0;
1561 
1562 out:
1563     qemu_vfree(buf1);
1564     qemu_vfree(buf2);
1565     blk_unref(blk2);
1566 out2:
1567     blk_unref(blk1);
1568 out3:
1569     qemu_progress_end();
1570 out4:
1571     return ret;
1572 }
1573 
1574 enum ImgConvertBlockStatus {
1575     BLK_DATA,
1576     BLK_ZERO,
1577     BLK_BACKING_FILE,
1578 };
1579 
1580 #define MAX_COROUTINES 16
1581 
1582 typedef struct ImgConvertState {
1583     BlockBackend **src;
1584     int64_t *src_sectors;
1585     int src_num;
1586     int64_t total_sectors;
1587     int64_t allocated_sectors;
1588     int64_t allocated_done;
1589     int64_t sector_num;
1590     int64_t wr_offs;
1591     enum ImgConvertBlockStatus status;
1592     int64_t sector_next_status;
1593     BlockBackend *target;
1594     bool has_zero_init;
1595     bool compressed;
1596     bool unallocated_blocks_are_zero;
1597     bool target_is_new;
1598     bool target_has_backing;
1599     int64_t target_backing_sectors; /* negative if unknown */
1600     bool wr_in_order;
1601     bool copy_range;
1602     bool salvage;
1603     bool quiet;
1604     int min_sparse;
1605     int alignment;
1606     size_t cluster_sectors;
1607     size_t buf_sectors;
1608     long num_coroutines;
1609     int running_coroutines;
1610     Coroutine *co[MAX_COROUTINES];
1611     int64_t wait_sector_num[MAX_COROUTINES];
1612     CoMutex lock;
1613     int ret;
1614 } ImgConvertState;
1615 
1616 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1617                                 int *src_cur, int64_t *src_cur_offset)
1618 {
1619     *src_cur = 0;
1620     *src_cur_offset = 0;
1621     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1622         *src_cur_offset += s->src_sectors[*src_cur];
1623         (*src_cur)++;
1624         assert(*src_cur < s->src_num);
1625     }
1626 }
1627 
1628 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1629 {
1630     int64_t src_cur_offset;
1631     int ret, n, src_cur;
1632     bool post_backing_zero = false;
1633 
1634     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1635 
1636     assert(s->total_sectors > sector_num);
1637     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1638 
1639     if (s->target_backing_sectors >= 0) {
1640         if (sector_num >= s->target_backing_sectors) {
1641             post_backing_zero = s->unallocated_blocks_are_zero;
1642         } else if (sector_num + n > s->target_backing_sectors) {
1643             /* Split requests around target_backing_sectors (because
1644              * starting from there, zeros are handled differently) */
1645             n = s->target_backing_sectors - sector_num;
1646         }
1647     }
1648 
1649     if (s->sector_next_status <= sector_num) {
1650         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1651         int64_t count;
1652 
1653         do {
1654             count = n * BDRV_SECTOR_SIZE;
1655 
1656             if (s->target_has_backing) {
1657                 ret = bdrv_block_status(blk_bs(s->src[src_cur]), offset,
1658                                         count, &count, NULL, NULL);
1659             } else {
1660                 ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL,
1661                                               offset, count, &count, NULL,
1662                                               NULL);
1663             }
1664 
1665             if (ret < 0) {
1666                 if (s->salvage) {
1667                     if (n == 1) {
1668                         if (!s->quiet) {
1669                             warn_report("error while reading block status at "
1670                                         "offset %" PRIu64 ": %s", offset,
1671                                         strerror(-ret));
1672                         }
1673                         /* Just try to read the data, then */
1674                         ret = BDRV_BLOCK_DATA;
1675                         count = BDRV_SECTOR_SIZE;
1676                     } else {
1677                         /* Retry on a shorter range */
1678                         n = DIV_ROUND_UP(n, 4);
1679                     }
1680                 } else {
1681                     error_report("error while reading block status at offset "
1682                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1683                     return ret;
1684                 }
1685             }
1686         } while (ret < 0);
1687 
1688         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1689 
1690         if (ret & BDRV_BLOCK_ZERO) {
1691             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1692         } else if (ret & BDRV_BLOCK_DATA) {
1693             s->status = BLK_DATA;
1694         } else {
1695             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1696         }
1697 
1698         s->sector_next_status = sector_num + n;
1699     }
1700 
1701     n = MIN(n, s->sector_next_status - sector_num);
1702     if (s->status == BLK_DATA) {
1703         n = MIN(n, s->buf_sectors);
1704     }
1705 
1706     /* We need to write complete clusters for compressed images, so if an
1707      * unallocated area is shorter than that, we must consider the whole
1708      * cluster allocated. */
1709     if (s->compressed) {
1710         if (n < s->cluster_sectors) {
1711             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1712             s->status = BLK_DATA;
1713         } else {
1714             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1715         }
1716     }
1717 
1718     return n;
1719 }
1720 
1721 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1722                                         int nb_sectors, uint8_t *buf)
1723 {
1724     uint64_t single_read_until = 0;
1725     int n, ret;
1726 
1727     assert(nb_sectors <= s->buf_sectors);
1728     while (nb_sectors > 0) {
1729         BlockBackend *blk;
1730         int src_cur;
1731         int64_t bs_sectors, src_cur_offset;
1732         uint64_t offset;
1733 
1734         /* In the case of compression with multiple source files, we can get a
1735          * nb_sectors that spreads into the next part. So we must be able to
1736          * read across multiple BDSes for one convert_read() call. */
1737         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1738         blk = s->src[src_cur];
1739         bs_sectors = s->src_sectors[src_cur];
1740 
1741         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1742 
1743         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1744         if (single_read_until > offset) {
1745             n = 1;
1746         }
1747 
1748         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1749         if (ret < 0) {
1750             if (s->salvage) {
1751                 if (n > 1) {
1752                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1753                     continue;
1754                 } else {
1755                     if (!s->quiet) {
1756                         warn_report("error while reading offset %" PRIu64
1757                                     ": %s", offset, strerror(-ret));
1758                     }
1759                     memset(buf, 0, BDRV_SECTOR_SIZE);
1760                 }
1761             } else {
1762                 return ret;
1763             }
1764         }
1765 
1766         sector_num += n;
1767         nb_sectors -= n;
1768         buf += n * BDRV_SECTOR_SIZE;
1769     }
1770 
1771     return 0;
1772 }
1773 
1774 
1775 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1776                                          int nb_sectors, uint8_t *buf,
1777                                          enum ImgConvertBlockStatus status)
1778 {
1779     int ret;
1780 
1781     while (nb_sectors > 0) {
1782         int n = nb_sectors;
1783         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1784 
1785         switch (status) {
1786         case BLK_BACKING_FILE:
1787             /* If we have a backing file, leave clusters unallocated that are
1788              * unallocated in the source image, so that the backing file is
1789              * visible at the respective offset. */
1790             assert(s->target_has_backing);
1791             break;
1792 
1793         case BLK_DATA:
1794             /* If we're told to keep the target fully allocated (-S 0) or there
1795              * is real non-zero data, we must write it. Otherwise we can treat
1796              * it as zero sectors.
1797              * Compressed clusters need to be written as a whole, so in that
1798              * case we can only save the write if the buffer is completely
1799              * zeroed. */
1800             if (!s->min_sparse ||
1801                 (!s->compressed &&
1802                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1803                                           sector_num, s->alignment)) ||
1804                 (s->compressed &&
1805                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1806             {
1807                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1808                                     n << BDRV_SECTOR_BITS, buf, flags);
1809                 if (ret < 0) {
1810                     return ret;
1811                 }
1812                 break;
1813             }
1814             /* fall-through */
1815 
1816         case BLK_ZERO:
1817             if (s->has_zero_init) {
1818                 assert(!s->target_has_backing);
1819                 break;
1820             }
1821             ret = blk_co_pwrite_zeroes(s->target,
1822                                        sector_num << BDRV_SECTOR_BITS,
1823                                        n << BDRV_SECTOR_BITS,
1824                                        BDRV_REQ_MAY_UNMAP);
1825             if (ret < 0) {
1826                 return ret;
1827             }
1828             break;
1829         }
1830 
1831         sector_num += n;
1832         nb_sectors -= n;
1833         buf += n * BDRV_SECTOR_SIZE;
1834     }
1835 
1836     return 0;
1837 }
1838 
1839 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1840                                               int nb_sectors)
1841 {
1842     int n, ret;
1843 
1844     while (nb_sectors > 0) {
1845         BlockBackend *blk;
1846         int src_cur;
1847         int64_t bs_sectors, src_cur_offset;
1848         int64_t offset;
1849 
1850         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1851         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1852         blk = s->src[src_cur];
1853         bs_sectors = s->src_sectors[src_cur];
1854 
1855         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1856 
1857         ret = blk_co_copy_range(blk, offset, s->target,
1858                                 sector_num << BDRV_SECTOR_BITS,
1859                                 n << BDRV_SECTOR_BITS, 0, 0);
1860         if (ret < 0) {
1861             return ret;
1862         }
1863 
1864         sector_num += n;
1865         nb_sectors -= n;
1866     }
1867     return 0;
1868 }
1869 
1870 static void coroutine_fn convert_co_do_copy(void *opaque)
1871 {
1872     ImgConvertState *s = opaque;
1873     uint8_t *buf = NULL;
1874     int ret, i;
1875     int index = -1;
1876 
1877     for (i = 0; i < s->num_coroutines; i++) {
1878         if (s->co[i] == qemu_coroutine_self()) {
1879             index = i;
1880             break;
1881         }
1882     }
1883     assert(index >= 0);
1884 
1885     s->running_coroutines++;
1886     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1887 
1888     while (1) {
1889         int n;
1890         int64_t sector_num;
1891         enum ImgConvertBlockStatus status;
1892         bool copy_range;
1893 
1894         qemu_co_mutex_lock(&s->lock);
1895         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1896             qemu_co_mutex_unlock(&s->lock);
1897             break;
1898         }
1899         n = convert_iteration_sectors(s, s->sector_num);
1900         if (n < 0) {
1901             qemu_co_mutex_unlock(&s->lock);
1902             s->ret = n;
1903             break;
1904         }
1905         /* save current sector and allocation status to local variables */
1906         sector_num = s->sector_num;
1907         status = s->status;
1908         if (!s->min_sparse && s->status == BLK_ZERO) {
1909             n = MIN(n, s->buf_sectors);
1910         }
1911         /* increment global sector counter so that other coroutines can
1912          * already continue reading beyond this request */
1913         s->sector_num += n;
1914         qemu_co_mutex_unlock(&s->lock);
1915 
1916         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
1917             s->allocated_done += n;
1918             qemu_progress_print(100.0 * s->allocated_done /
1919                                         s->allocated_sectors, 0);
1920         }
1921 
1922 retry:
1923         copy_range = s->copy_range && s->status == BLK_DATA;
1924         if (status == BLK_DATA && !copy_range) {
1925             ret = convert_co_read(s, sector_num, n, buf);
1926             if (ret < 0) {
1927                 error_report("error while reading sector %" PRId64
1928                              ": %s", sector_num, strerror(-ret));
1929                 s->ret = ret;
1930             }
1931         } else if (!s->min_sparse && status == BLK_ZERO) {
1932             status = BLK_DATA;
1933             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
1934         }
1935 
1936         if (s->wr_in_order) {
1937             /* keep writes in order */
1938             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
1939                 s->wait_sector_num[index] = sector_num;
1940                 qemu_coroutine_yield();
1941             }
1942             s->wait_sector_num[index] = -1;
1943         }
1944 
1945         if (s->ret == -EINPROGRESS) {
1946             if (copy_range) {
1947                 ret = convert_co_copy_range(s, sector_num, n);
1948                 if (ret) {
1949                     s->copy_range = false;
1950                     goto retry;
1951                 }
1952             } else {
1953                 ret = convert_co_write(s, sector_num, n, buf, status);
1954             }
1955             if (ret < 0) {
1956                 error_report("error while writing sector %" PRId64
1957                              ": %s", sector_num, strerror(-ret));
1958                 s->ret = ret;
1959             }
1960         }
1961 
1962         if (s->wr_in_order) {
1963             /* reenter the coroutine that might have waited
1964              * for this write to complete */
1965             s->wr_offs = sector_num + n;
1966             for (i = 0; i < s->num_coroutines; i++) {
1967                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
1968                     /*
1969                      * A -> B -> A cannot occur because A has
1970                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
1971                      * B will never enter A during this time window.
1972                      */
1973                     qemu_coroutine_enter(s->co[i]);
1974                     break;
1975                 }
1976             }
1977         }
1978     }
1979 
1980     qemu_vfree(buf);
1981     s->co[index] = NULL;
1982     s->running_coroutines--;
1983     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
1984         /* the convert job finished successfully */
1985         s->ret = 0;
1986     }
1987 }
1988 
1989 static int convert_do_copy(ImgConvertState *s)
1990 {
1991     int ret, i, n;
1992     int64_t sector_num = 0;
1993 
1994     /* Check whether we have zero initialisation or can get it efficiently */
1995     if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
1996         !s->target_has_backing) {
1997         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
1998     }
1999 
2000     if (!s->has_zero_init && !s->target_has_backing &&
2001         bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
2002     {
2003         ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
2004         if (ret == 0) {
2005             s->has_zero_init = true;
2006         }
2007     }
2008 
2009     /* Allocate buffer for copied data. For compressed images, only one cluster
2010      * can be copied at a time. */
2011     if (s->compressed) {
2012         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2013             error_report("invalid cluster size");
2014             return -EINVAL;
2015         }
2016         s->buf_sectors = s->cluster_sectors;
2017     }
2018 
2019     while (sector_num < s->total_sectors) {
2020         n = convert_iteration_sectors(s, sector_num);
2021         if (n < 0) {
2022             return n;
2023         }
2024         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2025         {
2026             s->allocated_sectors += n;
2027         }
2028         sector_num += n;
2029     }
2030 
2031     /* Do the copy */
2032     s->sector_next_status = 0;
2033     s->ret = -EINPROGRESS;
2034 
2035     qemu_co_mutex_init(&s->lock);
2036     for (i = 0; i < s->num_coroutines; i++) {
2037         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2038         s->wait_sector_num[i] = -1;
2039         qemu_coroutine_enter(s->co[i]);
2040     }
2041 
2042     while (s->running_coroutines) {
2043         main_loop_wait(false);
2044     }
2045 
2046     if (s->compressed && !s->ret) {
2047         /* signal EOF to align */
2048         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
2049         if (ret < 0) {
2050             return ret;
2051         }
2052     }
2053 
2054     return s->ret;
2055 }
2056 
2057 #define MAX_BUF_SECTORS 32768
2058 
2059 static int img_convert(int argc, char **argv)
2060 {
2061     int c, bs_i, flags, src_flags = 0;
2062     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2063                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2064                *out_filename, *out_baseimg_param, *snapshot_name = NULL;
2065     BlockDriver *drv = NULL, *proto_drv = NULL;
2066     BlockDriverInfo bdi;
2067     BlockDriverState *out_bs;
2068     QemuOpts *opts = NULL, *sn_opts = NULL;
2069     QemuOptsList *create_opts = NULL;
2070     QDict *open_opts = NULL;
2071     char *options = NULL;
2072     Error *local_err = NULL;
2073     bool writethrough, src_writethrough, image_opts = false,
2074          skip_create = false, progress = false, tgt_image_opts = false;
2075     int64_t ret = -EINVAL;
2076     bool force_share = false;
2077     bool explict_min_sparse = false;
2078 
2079     ImgConvertState s = (ImgConvertState) {
2080         /* Need at least 4k of zeros for sparse detection */
2081         .min_sparse         = 8,
2082         .copy_range         = false,
2083         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2084         .wr_in_order        = true,
2085         .num_coroutines     = 8,
2086     };
2087 
2088     for(;;) {
2089         static const struct option long_options[] = {
2090             {"help", no_argument, 0, 'h'},
2091             {"object", required_argument, 0, OPTION_OBJECT},
2092             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2093             {"force-share", no_argument, 0, 'U'},
2094             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2095             {"salvage", no_argument, 0, OPTION_SALVAGE},
2096             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2097             {0, 0, 0, 0}
2098         };
2099         c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU",
2100                         long_options, NULL);
2101         if (c == -1) {
2102             break;
2103         }
2104         switch(c) {
2105         case ':':
2106             missing_argument(argv[optind - 1]);
2107             break;
2108         case '?':
2109             unrecognized_option(argv[optind - 1]);
2110             break;
2111         case 'h':
2112             help();
2113             break;
2114         case 'f':
2115             fmt = optarg;
2116             break;
2117         case 'O':
2118             out_fmt = optarg;
2119             break;
2120         case 'B':
2121             out_baseimg = optarg;
2122             break;
2123         case 'C':
2124             s.copy_range = true;
2125             break;
2126         case 'c':
2127             s.compressed = true;
2128             break;
2129         case 'o':
2130             if (!is_valid_option_list(optarg)) {
2131                 error_report("Invalid option list: %s", optarg);
2132                 goto fail_getopt;
2133             }
2134             if (!options) {
2135                 options = g_strdup(optarg);
2136             } else {
2137                 char *old_options = options;
2138                 options = g_strdup_printf("%s,%s", options, optarg);
2139                 g_free(old_options);
2140             }
2141             break;
2142         case 'l':
2143             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2144                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2145                                                   optarg, false);
2146                 if (!sn_opts) {
2147                     error_report("Failed in parsing snapshot param '%s'",
2148                                  optarg);
2149                     goto fail_getopt;
2150                 }
2151             } else {
2152                 snapshot_name = optarg;
2153             }
2154             break;
2155         case 'S':
2156         {
2157             int64_t sval;
2158 
2159             sval = cvtnum(optarg);
2160             if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2161                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2162                 error_report("Invalid buffer size for sparse output specified. "
2163                     "Valid sizes are multiples of %llu up to %llu. Select "
2164                     "0 to disable sparse detection (fully allocates output).",
2165                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2166                 goto fail_getopt;
2167             }
2168 
2169             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2170             explict_min_sparse = true;
2171             break;
2172         }
2173         case 'p':
2174             progress = true;
2175             break;
2176         case 't':
2177             cache = optarg;
2178             break;
2179         case 'T':
2180             src_cache = optarg;
2181             break;
2182         case 'q':
2183             s.quiet = true;
2184             break;
2185         case 'n':
2186             skip_create = true;
2187             break;
2188         case 'm':
2189             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2190                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2191                 error_report("Invalid number of coroutines. Allowed number of"
2192                              " coroutines is between 1 and %d", MAX_COROUTINES);
2193                 goto fail_getopt;
2194             }
2195             break;
2196         case 'W':
2197             s.wr_in_order = false;
2198             break;
2199         case 'U':
2200             force_share = true;
2201             break;
2202         case OPTION_OBJECT: {
2203             QemuOpts *object_opts;
2204             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
2205                                                   optarg, true);
2206             if (!object_opts) {
2207                 goto fail_getopt;
2208             }
2209             break;
2210         }
2211         case OPTION_IMAGE_OPTS:
2212             image_opts = true;
2213             break;
2214         case OPTION_SALVAGE:
2215             s.salvage = true;
2216             break;
2217         case OPTION_TARGET_IMAGE_OPTS:
2218             tgt_image_opts = true;
2219             break;
2220         case OPTION_TARGET_IS_ZERO:
2221             /*
2222              * The user asserting that the target is blank has the
2223              * same effect as the target driver supporting zero
2224              * initialisation.
2225              */
2226             s.has_zero_init = true;
2227             break;
2228         }
2229     }
2230 
2231     if (!out_fmt && !tgt_image_opts) {
2232         out_fmt = "raw";
2233     }
2234 
2235     if (qemu_opts_foreach(&qemu_object_opts,
2236                           user_creatable_add_opts_foreach,
2237                           qemu_img_object_print_help, &error_fatal)) {
2238         goto fail_getopt;
2239     }
2240 
2241     if (s.compressed && s.copy_range) {
2242         error_report("Cannot enable copy offloading when -c is used");
2243         goto fail_getopt;
2244     }
2245 
2246     if (explict_min_sparse && s.copy_range) {
2247         error_report("Cannot enable copy offloading when -S is used");
2248         goto fail_getopt;
2249     }
2250 
2251     if (s.copy_range && s.salvage) {
2252         error_report("Cannot use copy offloading in salvaging mode");
2253         goto fail_getopt;
2254     }
2255 
2256     if (tgt_image_opts && !skip_create) {
2257         error_report("--target-image-opts requires use of -n flag");
2258         goto fail_getopt;
2259     }
2260 
2261     if (skip_create && options) {
2262         warn_report("-o has no effect when skipping image creation");
2263         warn_report("This will become an error in future QEMU versions.");
2264     }
2265 
2266     if (s.has_zero_init && !skip_create) {
2267         error_report("--target-is-zero requires use of -n flag");
2268         goto fail_getopt;
2269     }
2270 
2271     s.src_num = argc - optind - 1;
2272     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2273 
2274     if (options && has_help_option(options)) {
2275         if (out_fmt) {
2276             ret = print_block_option_help(out_filename, out_fmt);
2277             goto fail_getopt;
2278         } else {
2279             error_report("Option help requires a format be specified");
2280             goto fail_getopt;
2281         }
2282     }
2283 
2284     if (s.src_num < 1) {
2285         error_report("Must specify image file name");
2286         goto fail_getopt;
2287     }
2288 
2289 
2290     /* ret is still -EINVAL until here */
2291     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2292     if (ret < 0) {
2293         error_report("Invalid source cache option: %s", src_cache);
2294         goto fail_getopt;
2295     }
2296 
2297     /* Initialize before goto out */
2298     if (s.quiet) {
2299         progress = false;
2300     }
2301     qemu_progress_init(progress, 1.0);
2302     qemu_progress_print(0, 100);
2303 
2304     s.src = g_new0(BlockBackend *, s.src_num);
2305     s.src_sectors = g_new(int64_t, s.src_num);
2306 
2307     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2308         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2309                                fmt, src_flags, src_writethrough, s.quiet,
2310                                force_share);
2311         if (!s.src[bs_i]) {
2312             ret = -1;
2313             goto out;
2314         }
2315         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2316         if (s.src_sectors[bs_i] < 0) {
2317             error_report("Could not get size of %s: %s",
2318                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2319             ret = -1;
2320             goto out;
2321         }
2322         s.total_sectors += s.src_sectors[bs_i];
2323     }
2324 
2325     if (sn_opts) {
2326         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2327                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2328                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2329                                &local_err);
2330     } else if (snapshot_name != NULL) {
2331         if (s.src_num > 1) {
2332             error_report("No support for concatenating multiple snapshot");
2333             ret = -1;
2334             goto out;
2335         }
2336 
2337         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2338                                              &local_err);
2339     }
2340     if (local_err) {
2341         error_reportf_err(local_err, "Failed to load snapshot: ");
2342         ret = -1;
2343         goto out;
2344     }
2345 
2346     if (!skip_create) {
2347         /* Find driver and parse its options */
2348         drv = bdrv_find_format(out_fmt);
2349         if (!drv) {
2350             error_report("Unknown file format '%s'", out_fmt);
2351             ret = -1;
2352             goto out;
2353         }
2354 
2355         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2356         if (!proto_drv) {
2357             error_report_err(local_err);
2358             ret = -1;
2359             goto out;
2360         }
2361 
2362         if (!drv->create_opts) {
2363             error_report("Format driver '%s' does not support image creation",
2364                          drv->format_name);
2365             ret = -1;
2366             goto out;
2367         }
2368 
2369         if (!proto_drv->create_opts) {
2370             error_report("Protocol driver '%s' does not support image creation",
2371                          proto_drv->format_name);
2372             ret = -1;
2373             goto out;
2374         }
2375 
2376         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2377         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2378 
2379         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2380         if (options) {
2381             qemu_opts_do_parse(opts, options, NULL, &local_err);
2382             if (local_err) {
2383                 error_report_err(local_err);
2384                 ret = -1;
2385                 goto out;
2386             }
2387         }
2388 
2389         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s.total_sectors * 512,
2390                             &error_abort);
2391         ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2392         if (ret < 0) {
2393             goto out;
2394         }
2395     }
2396 
2397     /* Get backing file name if -o backing_file was used */
2398     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2399     if (out_baseimg_param) {
2400         out_baseimg = out_baseimg_param;
2401     }
2402     s.target_has_backing = (bool) out_baseimg;
2403 
2404     if (s.has_zero_init && s.target_has_backing) {
2405         error_report("Cannot use --target-is-zero when the destination "
2406                      "image has a backing file");
2407         goto out;
2408     }
2409 
2410     if (s.src_num > 1 && out_baseimg) {
2411         error_report("Having a backing file for the target makes no sense when "
2412                      "concatenating multiple input images");
2413         ret = -1;
2414         goto out;
2415     }
2416 
2417     /* Check if compression is supported */
2418     if (s.compressed) {
2419         bool encryption =
2420             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2421         const char *encryptfmt =
2422             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2423         const char *preallocation =
2424             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2425 
2426         if (drv && !block_driver_can_compress(drv)) {
2427             error_report("Compression not supported for this file format");
2428             ret = -1;
2429             goto out;
2430         }
2431 
2432         if (encryption || encryptfmt) {
2433             error_report("Compression and encryption not supported at "
2434                          "the same time");
2435             ret = -1;
2436             goto out;
2437         }
2438 
2439         if (preallocation
2440             && strcmp(preallocation, "off"))
2441         {
2442             error_report("Compression and preallocation not supported at "
2443                          "the same time");
2444             ret = -1;
2445             goto out;
2446         }
2447     }
2448 
2449     /*
2450      * The later open call will need any decryption secrets, and
2451      * bdrv_create() will purge "opts", so extract them now before
2452      * they are lost.
2453      */
2454     if (!skip_create) {
2455         open_opts = qdict_new();
2456         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2457     }
2458 
2459     if (!skip_create) {
2460         /* Create the new image */
2461         ret = bdrv_create(drv, out_filename, opts, &local_err);
2462         if (ret < 0) {
2463             error_reportf_err(local_err, "%s: error while converting %s: ",
2464                               out_filename, out_fmt);
2465             goto out;
2466         }
2467     }
2468 
2469     s.target_is_new = !skip_create;
2470 
2471     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2472     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2473     if (ret < 0) {
2474         error_report("Invalid cache option: %s", cache);
2475         goto out;
2476     }
2477 
2478     if (skip_create) {
2479         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2480                             flags, writethrough, s.quiet, false);
2481     } else {
2482         /* TODO ultimately we should allow --target-image-opts
2483          * to be used even when -n is not given.
2484          * That has to wait for bdrv_create to be improved
2485          * to allow filenames in option syntax
2486          */
2487         s.target = img_open_file(out_filename, open_opts, out_fmt,
2488                                  flags, writethrough, s.quiet, false);
2489         open_opts = NULL; /* blk_new_open will have freed it */
2490     }
2491     if (!s.target) {
2492         ret = -1;
2493         goto out;
2494     }
2495     out_bs = blk_bs(s.target);
2496 
2497     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2498         error_report("Compression not supported for this file format");
2499         ret = -1;
2500         goto out;
2501     }
2502 
2503     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2504      * or discard_alignment of the out_bs is greater. Limit to
2505      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2506     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2507                         MAX(s.buf_sectors,
2508                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2509                                 out_bs->bl.pdiscard_alignment >>
2510                                 BDRV_SECTOR_BITS)));
2511 
2512     /* try to align the write requests to the destination to avoid unnecessary
2513      * RMW cycles. */
2514     s.alignment = MAX(pow2floor(s.min_sparse),
2515                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2516                                    BDRV_SECTOR_SIZE));
2517     assert(is_power_of_2(s.alignment));
2518 
2519     if (skip_create) {
2520         int64_t output_sectors = blk_nb_sectors(s.target);
2521         if (output_sectors < 0) {
2522             error_report("unable to get output image length: %s",
2523                          strerror(-output_sectors));
2524             ret = -1;
2525             goto out;
2526         } else if (output_sectors < s.total_sectors) {
2527             error_report("output file is smaller than input file");
2528             ret = -1;
2529             goto out;
2530         }
2531     }
2532 
2533     if (s.target_has_backing && s.target_is_new) {
2534         /* Errors are treated as "backing length unknown" (which means
2535          * s.target_backing_sectors has to be negative, which it will
2536          * be automatically).  The backing file length is used only
2537          * for optimizations, so such a case is not fatal. */
2538         s.target_backing_sectors = bdrv_nb_sectors(out_bs->backing->bs);
2539     } else {
2540         s.target_backing_sectors = -1;
2541     }
2542 
2543     ret = bdrv_get_info(out_bs, &bdi);
2544     if (ret < 0) {
2545         if (s.compressed) {
2546             error_report("could not get block driver info");
2547             goto out;
2548         }
2549     } else {
2550         s.compressed = s.compressed || bdi.needs_compressed_writes;
2551         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2552         s.unallocated_blocks_are_zero = bdi.unallocated_blocks_are_zero;
2553     }
2554 
2555     ret = convert_do_copy(&s);
2556 out:
2557     if (!ret) {
2558         qemu_progress_print(100, 0);
2559     }
2560     qemu_progress_end();
2561     qemu_opts_del(opts);
2562     qemu_opts_free(create_opts);
2563     qemu_opts_del(sn_opts);
2564     qobject_unref(open_opts);
2565     blk_unref(s.target);
2566     if (s.src) {
2567         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2568             blk_unref(s.src[bs_i]);
2569         }
2570         g_free(s.src);
2571     }
2572     g_free(s.src_sectors);
2573 fail_getopt:
2574     g_free(options);
2575 
2576     return !!ret;
2577 }
2578 
2579 
2580 static void dump_snapshots(BlockDriverState *bs)
2581 {
2582     QEMUSnapshotInfo *sn_tab, *sn;
2583     int nb_sns, i;
2584 
2585     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2586     if (nb_sns <= 0)
2587         return;
2588     printf("Snapshot list:\n");
2589     bdrv_snapshot_dump(NULL);
2590     printf("\n");
2591     for(i = 0; i < nb_sns; i++) {
2592         sn = &sn_tab[i];
2593         bdrv_snapshot_dump(sn);
2594         printf("\n");
2595     }
2596     g_free(sn_tab);
2597 }
2598 
2599 static void dump_json_image_info_list(ImageInfoList *list)
2600 {
2601     QString *str;
2602     QObject *obj;
2603     Visitor *v = qobject_output_visitor_new(&obj);
2604 
2605     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2606     visit_complete(v, &obj);
2607     str = qobject_to_json_pretty(obj);
2608     assert(str != NULL);
2609     printf("%s\n", qstring_get_str(str));
2610     qobject_unref(obj);
2611     visit_free(v);
2612     qobject_unref(str);
2613 }
2614 
2615 static void dump_json_image_info(ImageInfo *info)
2616 {
2617     QString *str;
2618     QObject *obj;
2619     Visitor *v = qobject_output_visitor_new(&obj);
2620 
2621     visit_type_ImageInfo(v, NULL, &info, &error_abort);
2622     visit_complete(v, &obj);
2623     str = qobject_to_json_pretty(obj);
2624     assert(str != NULL);
2625     printf("%s\n", qstring_get_str(str));
2626     qobject_unref(obj);
2627     visit_free(v);
2628     qobject_unref(str);
2629 }
2630 
2631 static void dump_human_image_info_list(ImageInfoList *list)
2632 {
2633     ImageInfoList *elem;
2634     bool delim = false;
2635 
2636     for (elem = list; elem; elem = elem->next) {
2637         if (delim) {
2638             printf("\n");
2639         }
2640         delim = true;
2641 
2642         bdrv_image_info_dump(elem->value);
2643     }
2644 }
2645 
2646 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2647 {
2648     return strcmp(a, b) == 0;
2649 }
2650 
2651 /**
2652  * Open an image file chain and return an ImageInfoList
2653  *
2654  * @filename: topmost image filename
2655  * @fmt: topmost image format (may be NULL to autodetect)
2656  * @chain: true  - enumerate entire backing file chain
2657  *         false - only topmost image file
2658  *
2659  * Returns a list of ImageInfo objects or NULL if there was an error opening an
2660  * image file.  If there was an error a message will have been printed to
2661  * stderr.
2662  */
2663 static ImageInfoList *collect_image_info_list(bool image_opts,
2664                                               const char *filename,
2665                                               const char *fmt,
2666                                               bool chain, bool force_share)
2667 {
2668     ImageInfoList *head = NULL;
2669     ImageInfoList **last = &head;
2670     GHashTable *filenames;
2671     Error *err = NULL;
2672 
2673     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2674 
2675     while (filename) {
2676         BlockBackend *blk;
2677         BlockDriverState *bs;
2678         ImageInfo *info;
2679         ImageInfoList *elem;
2680 
2681         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2682             error_report("Backing file '%s' creates an infinite loop.",
2683                          filename);
2684             goto err;
2685         }
2686         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2687 
2688         blk = img_open(image_opts, filename, fmt,
2689                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2690                        force_share);
2691         if (!blk) {
2692             goto err;
2693         }
2694         bs = blk_bs(blk);
2695 
2696         bdrv_query_image_info(bs, &info, &err);
2697         if (err) {
2698             error_report_err(err);
2699             blk_unref(blk);
2700             goto err;
2701         }
2702 
2703         elem = g_new0(ImageInfoList, 1);
2704         elem->value = info;
2705         *last = elem;
2706         last = &elem->next;
2707 
2708         blk_unref(blk);
2709 
2710         /* Clear parameters that only apply to the topmost image */
2711         filename = fmt = NULL;
2712         image_opts = false;
2713 
2714         if (chain) {
2715             if (info->has_full_backing_filename) {
2716                 filename = info->full_backing_filename;
2717             } else if (info->has_backing_filename) {
2718                 error_report("Could not determine absolute backing filename,"
2719                              " but backing filename '%s' present",
2720                              info->backing_filename);
2721                 goto err;
2722             }
2723             if (info->has_backing_filename_format) {
2724                 fmt = info->backing_filename_format;
2725             }
2726         }
2727     }
2728     g_hash_table_destroy(filenames);
2729     return head;
2730 
2731 err:
2732     qapi_free_ImageInfoList(head);
2733     g_hash_table_destroy(filenames);
2734     return NULL;
2735 }
2736 
2737 static int img_info(int argc, char **argv)
2738 {
2739     int c;
2740     OutputFormat output_format = OFORMAT_HUMAN;
2741     bool chain = false;
2742     const char *filename, *fmt, *output;
2743     ImageInfoList *list;
2744     bool image_opts = false;
2745     bool force_share = false;
2746 
2747     fmt = NULL;
2748     output = NULL;
2749     for(;;) {
2750         int option_index = 0;
2751         static const struct option long_options[] = {
2752             {"help", no_argument, 0, 'h'},
2753             {"format", required_argument, 0, 'f'},
2754             {"output", required_argument, 0, OPTION_OUTPUT},
2755             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2756             {"object", required_argument, 0, OPTION_OBJECT},
2757             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2758             {"force-share", no_argument, 0, 'U'},
2759             {0, 0, 0, 0}
2760         };
2761         c = getopt_long(argc, argv, ":f:hU",
2762                         long_options, &option_index);
2763         if (c == -1) {
2764             break;
2765         }
2766         switch(c) {
2767         case ':':
2768             missing_argument(argv[optind - 1]);
2769             break;
2770         case '?':
2771             unrecognized_option(argv[optind - 1]);
2772             break;
2773         case 'h':
2774             help();
2775             break;
2776         case 'f':
2777             fmt = optarg;
2778             break;
2779         case 'U':
2780             force_share = true;
2781             break;
2782         case OPTION_OUTPUT:
2783             output = optarg;
2784             break;
2785         case OPTION_BACKING_CHAIN:
2786             chain = true;
2787             break;
2788         case OPTION_OBJECT: {
2789             QemuOpts *opts;
2790             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2791                                            optarg, true);
2792             if (!opts) {
2793                 return 1;
2794             }
2795         }   break;
2796         case OPTION_IMAGE_OPTS:
2797             image_opts = true;
2798             break;
2799         }
2800     }
2801     if (optind != argc - 1) {
2802         error_exit("Expecting one image file name");
2803     }
2804     filename = argv[optind++];
2805 
2806     if (output && !strcmp(output, "json")) {
2807         output_format = OFORMAT_JSON;
2808     } else if (output && !strcmp(output, "human")) {
2809         output_format = OFORMAT_HUMAN;
2810     } else if (output) {
2811         error_report("--output must be used with human or json as argument.");
2812         return 1;
2813     }
2814 
2815     if (qemu_opts_foreach(&qemu_object_opts,
2816                           user_creatable_add_opts_foreach,
2817                           qemu_img_object_print_help, &error_fatal)) {
2818         return 1;
2819     }
2820 
2821     list = collect_image_info_list(image_opts, filename, fmt, chain,
2822                                    force_share);
2823     if (!list) {
2824         return 1;
2825     }
2826 
2827     switch (output_format) {
2828     case OFORMAT_HUMAN:
2829         dump_human_image_info_list(list);
2830         break;
2831     case OFORMAT_JSON:
2832         if (chain) {
2833             dump_json_image_info_list(list);
2834         } else {
2835             dump_json_image_info(list->value);
2836         }
2837         break;
2838     }
2839 
2840     qapi_free_ImageInfoList(list);
2841     return 0;
2842 }
2843 
2844 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
2845                           MapEntry *next)
2846 {
2847     switch (output_format) {
2848     case OFORMAT_HUMAN:
2849         if (e->data && !e->has_offset) {
2850             error_report("File contains external, encrypted or compressed clusters.");
2851             return -1;
2852         }
2853         if (e->data && !e->zero) {
2854             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2855                    e->start, e->length,
2856                    e->has_offset ? e->offset : 0,
2857                    e->has_filename ? e->filename : "");
2858         }
2859         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2860          * Modify the flags here to allow more coalescing.
2861          */
2862         if (next && (!next->data || next->zero)) {
2863             next->data = false;
2864             next->zero = true;
2865         }
2866         break;
2867     case OFORMAT_JSON:
2868         printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2869                " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2870                (e->start == 0 ? "[" : ",\n"),
2871                e->start, e->length, e->depth,
2872                e->zero ? "true" : "false",
2873                e->data ? "true" : "false");
2874         if (e->has_offset) {
2875             printf(", \"offset\": %"PRId64"", e->offset);
2876         }
2877         putchar('}');
2878 
2879         if (!next) {
2880             printf("]\n");
2881         }
2882         break;
2883     }
2884     return 0;
2885 }
2886 
2887 static int get_block_status(BlockDriverState *bs, int64_t offset,
2888                             int64_t bytes, MapEntry *e)
2889 {
2890     int ret;
2891     int depth;
2892     BlockDriverState *file;
2893     bool has_offset;
2894     int64_t map;
2895     char *filename = NULL;
2896 
2897     /* As an optimization, we could cache the current range of unallocated
2898      * clusters in each file of the chain, and avoid querying the same
2899      * range repeatedly.
2900      */
2901 
2902     depth = 0;
2903     for (;;) {
2904         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
2905         if (ret < 0) {
2906             return ret;
2907         }
2908         assert(bytes);
2909         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2910             break;
2911         }
2912         bs = backing_bs(bs);
2913         if (bs == NULL) {
2914             ret = 0;
2915             break;
2916         }
2917 
2918         depth++;
2919     }
2920 
2921     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2922 
2923     if (file && has_offset) {
2924         bdrv_refresh_filename(file);
2925         filename = file->filename;
2926     }
2927 
2928     *e = (MapEntry) {
2929         .start = offset,
2930         .length = bytes,
2931         .data = !!(ret & BDRV_BLOCK_DATA),
2932         .zero = !!(ret & BDRV_BLOCK_ZERO),
2933         .offset = map,
2934         .has_offset = has_offset,
2935         .depth = depth,
2936         .has_filename = filename,
2937         .filename = filename,
2938     };
2939 
2940     return 0;
2941 }
2942 
2943 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2944 {
2945     if (curr->length == 0) {
2946         return false;
2947     }
2948     if (curr->zero != next->zero ||
2949         curr->data != next->data ||
2950         curr->depth != next->depth ||
2951         curr->has_filename != next->has_filename ||
2952         curr->has_offset != next->has_offset) {
2953         return false;
2954     }
2955     if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2956         return false;
2957     }
2958     if (curr->has_offset && curr->offset + curr->length != next->offset) {
2959         return false;
2960     }
2961     return true;
2962 }
2963 
2964 static int img_map(int argc, char **argv)
2965 {
2966     int c;
2967     OutputFormat output_format = OFORMAT_HUMAN;
2968     BlockBackend *blk;
2969     BlockDriverState *bs;
2970     const char *filename, *fmt, *output;
2971     int64_t length;
2972     MapEntry curr = { .length = 0 }, next;
2973     int ret = 0;
2974     bool image_opts = false;
2975     bool force_share = false;
2976 
2977     fmt = NULL;
2978     output = NULL;
2979     for (;;) {
2980         int option_index = 0;
2981         static const struct option long_options[] = {
2982             {"help", no_argument, 0, 'h'},
2983             {"format", required_argument, 0, 'f'},
2984             {"output", required_argument, 0, OPTION_OUTPUT},
2985             {"object", required_argument, 0, OPTION_OBJECT},
2986             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2987             {"force-share", no_argument, 0, 'U'},
2988             {0, 0, 0, 0}
2989         };
2990         c = getopt_long(argc, argv, ":f:hU",
2991                         long_options, &option_index);
2992         if (c == -1) {
2993             break;
2994         }
2995         switch (c) {
2996         case ':':
2997             missing_argument(argv[optind - 1]);
2998             break;
2999         case '?':
3000             unrecognized_option(argv[optind - 1]);
3001             break;
3002         case 'h':
3003             help();
3004             break;
3005         case 'f':
3006             fmt = optarg;
3007             break;
3008         case 'U':
3009             force_share = true;
3010             break;
3011         case OPTION_OUTPUT:
3012             output = optarg;
3013             break;
3014         case OPTION_OBJECT: {
3015             QemuOpts *opts;
3016             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3017                                            optarg, true);
3018             if (!opts) {
3019                 return 1;
3020             }
3021         }   break;
3022         case OPTION_IMAGE_OPTS:
3023             image_opts = true;
3024             break;
3025         }
3026     }
3027     if (optind != argc - 1) {
3028         error_exit("Expecting one image file name");
3029     }
3030     filename = argv[optind];
3031 
3032     if (output && !strcmp(output, "json")) {
3033         output_format = OFORMAT_JSON;
3034     } else if (output && !strcmp(output, "human")) {
3035         output_format = OFORMAT_HUMAN;
3036     } else if (output) {
3037         error_report("--output must be used with human or json as argument.");
3038         return 1;
3039     }
3040 
3041     if (qemu_opts_foreach(&qemu_object_opts,
3042                           user_creatable_add_opts_foreach,
3043                           qemu_img_object_print_help, &error_fatal)) {
3044         return 1;
3045     }
3046 
3047     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3048     if (!blk) {
3049         return 1;
3050     }
3051     bs = blk_bs(blk);
3052 
3053     if (output_format == OFORMAT_HUMAN) {
3054         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3055     }
3056 
3057     length = blk_getlength(blk);
3058     while (curr.start + curr.length < length) {
3059         int64_t offset = curr.start + curr.length;
3060         int64_t n;
3061 
3062         /* Probe up to 1 GiB at a time.  */
3063         n = MIN(1 * GiB, length - offset);
3064         ret = get_block_status(bs, offset, n, &next);
3065 
3066         if (ret < 0) {
3067             error_report("Could not read file metadata: %s", strerror(-ret));
3068             goto out;
3069         }
3070 
3071         if (entry_mergeable(&curr, &next)) {
3072             curr.length += next.length;
3073             continue;
3074         }
3075 
3076         if (curr.length > 0) {
3077             ret = dump_map_entry(output_format, &curr, &next);
3078             if (ret < 0) {
3079                 goto out;
3080             }
3081         }
3082         curr = next;
3083     }
3084 
3085     ret = dump_map_entry(output_format, &curr, NULL);
3086 
3087 out:
3088     blk_unref(blk);
3089     return ret < 0;
3090 }
3091 
3092 #define SNAPSHOT_LIST   1
3093 #define SNAPSHOT_CREATE 2
3094 #define SNAPSHOT_APPLY  3
3095 #define SNAPSHOT_DELETE 4
3096 
3097 static int img_snapshot(int argc, char **argv)
3098 {
3099     BlockBackend *blk;
3100     BlockDriverState *bs;
3101     QEMUSnapshotInfo sn;
3102     char *filename, *snapshot_name = NULL;
3103     int c, ret = 0, bdrv_oflags;
3104     int action = 0;
3105     qemu_timeval tv;
3106     bool quiet = false;
3107     Error *err = NULL;
3108     bool image_opts = false;
3109     bool force_share = false;
3110 
3111     bdrv_oflags = BDRV_O_RDWR;
3112     /* Parse commandline parameters */
3113     for(;;) {
3114         static const struct option long_options[] = {
3115             {"help", no_argument, 0, 'h'},
3116             {"object", required_argument, 0, OPTION_OBJECT},
3117             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3118             {"force-share", no_argument, 0, 'U'},
3119             {0, 0, 0, 0}
3120         };
3121         c = getopt_long(argc, argv, ":la:c:d:hqU",
3122                         long_options, NULL);
3123         if (c == -1) {
3124             break;
3125         }
3126         switch(c) {
3127         case ':':
3128             missing_argument(argv[optind - 1]);
3129             break;
3130         case '?':
3131             unrecognized_option(argv[optind - 1]);
3132             break;
3133         case 'h':
3134             help();
3135             return 0;
3136         case 'l':
3137             if (action) {
3138                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3139                 return 0;
3140             }
3141             action = SNAPSHOT_LIST;
3142             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3143             break;
3144         case 'a':
3145             if (action) {
3146                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3147                 return 0;
3148             }
3149             action = SNAPSHOT_APPLY;
3150             snapshot_name = optarg;
3151             break;
3152         case 'c':
3153             if (action) {
3154                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3155                 return 0;
3156             }
3157             action = SNAPSHOT_CREATE;
3158             snapshot_name = optarg;
3159             break;
3160         case 'd':
3161             if (action) {
3162                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3163                 return 0;
3164             }
3165             action = SNAPSHOT_DELETE;
3166             snapshot_name = optarg;
3167             break;
3168         case 'q':
3169             quiet = true;
3170             break;
3171         case 'U':
3172             force_share = true;
3173             break;
3174         case OPTION_OBJECT: {
3175             QemuOpts *opts;
3176             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3177                                            optarg, true);
3178             if (!opts) {
3179                 return 1;
3180             }
3181         }   break;
3182         case OPTION_IMAGE_OPTS:
3183             image_opts = true;
3184             break;
3185         }
3186     }
3187 
3188     if (optind != argc - 1) {
3189         error_exit("Expecting one image file name");
3190     }
3191     filename = argv[optind++];
3192 
3193     if (qemu_opts_foreach(&qemu_object_opts,
3194                           user_creatable_add_opts_foreach,
3195                           qemu_img_object_print_help, &error_fatal)) {
3196         return 1;
3197     }
3198 
3199     /* Open the image */
3200     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3201                    force_share);
3202     if (!blk) {
3203         return 1;
3204     }
3205     bs = blk_bs(blk);
3206 
3207     /* Perform the requested action */
3208     switch(action) {
3209     case SNAPSHOT_LIST:
3210         dump_snapshots(bs);
3211         break;
3212 
3213     case SNAPSHOT_CREATE:
3214         memset(&sn, 0, sizeof(sn));
3215         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3216 
3217         qemu_gettimeofday(&tv);
3218         sn.date_sec = tv.tv_sec;
3219         sn.date_nsec = tv.tv_usec * 1000;
3220 
3221         ret = bdrv_snapshot_create(bs, &sn);
3222         if (ret) {
3223             error_report("Could not create snapshot '%s': %d (%s)",
3224                 snapshot_name, ret, strerror(-ret));
3225         }
3226         break;
3227 
3228     case SNAPSHOT_APPLY:
3229         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3230         if (ret) {
3231             error_reportf_err(err, "Could not apply snapshot '%s': ",
3232                               snapshot_name);
3233         }
3234         break;
3235 
3236     case SNAPSHOT_DELETE:
3237         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3238         if (ret < 0) {
3239             error_report("Could not delete snapshot '%s': snapshot not "
3240                          "found", snapshot_name);
3241             ret = 1;
3242         } else {
3243             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3244             if (ret < 0) {
3245                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3246                                   snapshot_name);
3247                 ret = 1;
3248             }
3249         }
3250         break;
3251     }
3252 
3253     /* Cleanup */
3254     blk_unref(blk);
3255     if (ret) {
3256         return 1;
3257     }
3258     return 0;
3259 }
3260 
3261 static int img_rebase(int argc, char **argv)
3262 {
3263     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3264     uint8_t *buf_old = NULL;
3265     uint8_t *buf_new = NULL;
3266     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3267     char *filename;
3268     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3269     int c, flags, src_flags, ret;
3270     bool writethrough, src_writethrough;
3271     int unsafe = 0;
3272     bool force_share = false;
3273     int progress = 0;
3274     bool quiet = false;
3275     Error *local_err = NULL;
3276     bool image_opts = false;
3277 
3278     /* Parse commandline parameters */
3279     fmt = NULL;
3280     cache = BDRV_DEFAULT_CACHE;
3281     src_cache = BDRV_DEFAULT_CACHE;
3282     out_baseimg = NULL;
3283     out_basefmt = NULL;
3284     for(;;) {
3285         static const struct option long_options[] = {
3286             {"help", no_argument, 0, 'h'},
3287             {"object", required_argument, 0, OPTION_OBJECT},
3288             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3289             {"force-share", no_argument, 0, 'U'},
3290             {0, 0, 0, 0}
3291         };
3292         c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
3293                         long_options, NULL);
3294         if (c == -1) {
3295             break;
3296         }
3297         switch(c) {
3298         case ':':
3299             missing_argument(argv[optind - 1]);
3300             break;
3301         case '?':
3302             unrecognized_option(argv[optind - 1]);
3303             break;
3304         case 'h':
3305             help();
3306             return 0;
3307         case 'f':
3308             fmt = optarg;
3309             break;
3310         case 'F':
3311             out_basefmt = optarg;
3312             break;
3313         case 'b':
3314             out_baseimg = optarg;
3315             break;
3316         case 'u':
3317             unsafe = 1;
3318             break;
3319         case 'p':
3320             progress = 1;
3321             break;
3322         case 't':
3323             cache = optarg;
3324             break;
3325         case 'T':
3326             src_cache = optarg;
3327             break;
3328         case 'q':
3329             quiet = true;
3330             break;
3331         case OPTION_OBJECT: {
3332             QemuOpts *opts;
3333             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3334                                            optarg, true);
3335             if (!opts) {
3336                 return 1;
3337             }
3338         }   break;
3339         case OPTION_IMAGE_OPTS:
3340             image_opts = true;
3341             break;
3342         case 'U':
3343             force_share = true;
3344             break;
3345         }
3346     }
3347 
3348     if (quiet) {
3349         progress = 0;
3350     }
3351 
3352     if (optind != argc - 1) {
3353         error_exit("Expecting one image file name");
3354     }
3355     if (!unsafe && !out_baseimg) {
3356         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3357     }
3358     filename = argv[optind++];
3359 
3360     if (qemu_opts_foreach(&qemu_object_opts,
3361                           user_creatable_add_opts_foreach,
3362                           qemu_img_object_print_help, &error_fatal)) {
3363         return 1;
3364     }
3365 
3366     qemu_progress_init(progress, 2.0);
3367     qemu_progress_print(0, 100);
3368 
3369     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3370     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3371     if (ret < 0) {
3372         error_report("Invalid cache option: %s", cache);
3373         goto out;
3374     }
3375 
3376     src_flags = 0;
3377     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3378     if (ret < 0) {
3379         error_report("Invalid source cache option: %s", src_cache);
3380         goto out;
3381     }
3382 
3383     /* The source files are opened read-only, don't care about WCE */
3384     assert((src_flags & BDRV_O_RDWR) == 0);
3385     (void) src_writethrough;
3386 
3387     /*
3388      * Open the images.
3389      *
3390      * Ignore the old backing file for unsafe rebase in case we want to correct
3391      * the reference to a renamed or moved backing file.
3392      */
3393     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3394                    false);
3395     if (!blk) {
3396         ret = -1;
3397         goto out;
3398     }
3399     bs = blk_bs(blk);
3400 
3401     if (out_basefmt != NULL) {
3402         if (bdrv_find_format(out_basefmt) == NULL) {
3403             error_report("Invalid format name: '%s'", out_basefmt);
3404             ret = -1;
3405             goto out;
3406         }
3407     }
3408 
3409     /* For safe rebasing we need to compare old and new backing file */
3410     if (!unsafe) {
3411         QDict *options = NULL;
3412         BlockDriverState *base_bs = backing_bs(bs);
3413 
3414         if (base_bs) {
3415             blk_old_backing = blk_new(qemu_get_aio_context(),
3416                                       BLK_PERM_CONSISTENT_READ,
3417                                       BLK_PERM_ALL);
3418             ret = blk_insert_bs(blk_old_backing, base_bs,
3419                                 &local_err);
3420             if (ret < 0) {
3421                 error_reportf_err(local_err,
3422                                   "Could not reuse old backing file '%s': ",
3423                                   base_bs->filename);
3424                 goto out;
3425             }
3426         } else {
3427             blk_old_backing = NULL;
3428         }
3429 
3430         if (out_baseimg[0]) {
3431             const char *overlay_filename;
3432             char *out_real_path;
3433 
3434             options = qdict_new();
3435             if (out_basefmt) {
3436                 qdict_put_str(options, "driver", out_basefmt);
3437             }
3438             if (force_share) {
3439                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3440             }
3441 
3442             bdrv_refresh_filename(bs);
3443             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3444                                                      : bs->filename;
3445             out_real_path =
3446                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3447                                                              out_baseimg,
3448                                                              &local_err);
3449             if (local_err) {
3450                 qobject_unref(options);
3451                 error_reportf_err(local_err,
3452                                   "Could not resolve backing filename: ");
3453                 ret = -1;
3454                 goto out;
3455             }
3456 
3457             /*
3458              * Find out whether we rebase an image on top of a previous image
3459              * in its chain.
3460              */
3461             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3462             if (prefix_chain_bs) {
3463                 qobject_unref(options);
3464                 g_free(out_real_path);
3465 
3466                 blk_new_backing = blk_new(qemu_get_aio_context(),
3467                                           BLK_PERM_CONSISTENT_READ,
3468                                           BLK_PERM_ALL);
3469                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3470                                     &local_err);
3471                 if (ret < 0) {
3472                     error_reportf_err(local_err,
3473                                       "Could not reuse backing file '%s': ",
3474                                       out_baseimg);
3475                     goto out;
3476                 }
3477             } else {
3478                 blk_new_backing = blk_new_open(out_real_path, NULL,
3479                                                options, src_flags, &local_err);
3480                 g_free(out_real_path);
3481                 if (!blk_new_backing) {
3482                     error_reportf_err(local_err,
3483                                       "Could not open new backing file '%s': ",
3484                                       out_baseimg);
3485                     ret = -1;
3486                     goto out;
3487                 }
3488             }
3489         }
3490     }
3491 
3492     /*
3493      * Check each unallocated cluster in the COW file. If it is unallocated,
3494      * accesses go to the backing file. We must therefore compare this cluster
3495      * in the old and new backing file, and if they differ we need to copy it
3496      * from the old backing file into the COW file.
3497      *
3498      * If qemu-img crashes during this step, no harm is done. The content of
3499      * the image is the same as the original one at any time.
3500      */
3501     if (!unsafe) {
3502         int64_t size;
3503         int64_t old_backing_size = 0;
3504         int64_t new_backing_size = 0;
3505         uint64_t offset;
3506         int64_t n;
3507         float local_progress = 0;
3508 
3509         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3510         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3511 
3512         size = blk_getlength(blk);
3513         if (size < 0) {
3514             error_report("Could not get size of '%s': %s",
3515                          filename, strerror(-size));
3516             ret = -1;
3517             goto out;
3518         }
3519         if (blk_old_backing) {
3520             old_backing_size = blk_getlength(blk_old_backing);
3521             if (old_backing_size < 0) {
3522                 char backing_name[PATH_MAX];
3523 
3524                 bdrv_get_backing_filename(bs, backing_name,
3525                                           sizeof(backing_name));
3526                 error_report("Could not get size of '%s': %s",
3527                              backing_name, strerror(-old_backing_size));
3528                 ret = -1;
3529                 goto out;
3530             }
3531         }
3532         if (blk_new_backing) {
3533             new_backing_size = blk_getlength(blk_new_backing);
3534             if (new_backing_size < 0) {
3535                 error_report("Could not get size of '%s': %s",
3536                              out_baseimg, strerror(-new_backing_size));
3537                 ret = -1;
3538                 goto out;
3539             }
3540         }
3541 
3542         if (size != 0) {
3543             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3544         }
3545 
3546         for (offset = 0; offset < size; offset += n) {
3547             bool buf_old_is_zero = false;
3548 
3549             /* How many bytes can we handle with the next read? */
3550             n = MIN(IO_BUF_SIZE, size - offset);
3551 
3552             /* If the cluster is allocated, we don't need to take action */
3553             ret = bdrv_is_allocated(bs, offset, n, &n);
3554             if (ret < 0) {
3555                 error_report("error while reading image metadata: %s",
3556                              strerror(-ret));
3557                 goto out;
3558             }
3559             if (ret) {
3560                 continue;
3561             }
3562 
3563             if (prefix_chain_bs) {
3564                 /*
3565                  * If cluster wasn't changed since prefix_chain, we don't need
3566                  * to take action
3567                  */
3568                 ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
3569                                               false, offset, n, &n);
3570                 if (ret < 0) {
3571                     error_report("error while reading image metadata: %s",
3572                                  strerror(-ret));
3573                     goto out;
3574                 }
3575                 if (!ret) {
3576                     continue;
3577                 }
3578             }
3579 
3580             /*
3581              * Read old and new backing file and take into consideration that
3582              * backing files may be smaller than the COW image.
3583              */
3584             if (offset >= old_backing_size) {
3585                 memset(buf_old, 0, n);
3586                 buf_old_is_zero = true;
3587             } else {
3588                 if (offset + n > old_backing_size) {
3589                     n = old_backing_size - offset;
3590                 }
3591 
3592                 ret = blk_pread(blk_old_backing, offset, buf_old, n);
3593                 if (ret < 0) {
3594                     error_report("error while reading from old backing file");
3595                     goto out;
3596                 }
3597             }
3598 
3599             if (offset >= new_backing_size || !blk_new_backing) {
3600                 memset(buf_new, 0, n);
3601             } else {
3602                 if (offset + n > new_backing_size) {
3603                     n = new_backing_size - offset;
3604                 }
3605 
3606                 ret = blk_pread(blk_new_backing, offset, buf_new, n);
3607                 if (ret < 0) {
3608                     error_report("error while reading from new backing file");
3609                     goto out;
3610                 }
3611             }
3612 
3613             /* If they differ, we need to write to the COW file */
3614             uint64_t written = 0;
3615 
3616             while (written < n) {
3617                 int64_t pnum;
3618 
3619                 if (compare_buffers(buf_old + written, buf_new + written,
3620                                     n - written, &pnum))
3621                 {
3622                     if (buf_old_is_zero) {
3623                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
3624                     } else {
3625                         ret = blk_pwrite(blk, offset + written,
3626                                          buf_old + written, pnum, 0);
3627                     }
3628                     if (ret < 0) {
3629                         error_report("Error while writing to COW image: %s",
3630                             strerror(-ret));
3631                         goto out;
3632                     }
3633                 }
3634 
3635                 written += pnum;
3636             }
3637             qemu_progress_print(local_progress, 100);
3638         }
3639     }
3640 
3641     /*
3642      * Change the backing file. All clusters that are different from the old
3643      * backing file are overwritten in the COW file now, so the visible content
3644      * doesn't change when we switch the backing file.
3645      */
3646     if (out_baseimg && *out_baseimg) {
3647         ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3648     } else {
3649         ret = bdrv_change_backing_file(bs, NULL, NULL);
3650     }
3651 
3652     if (ret == -ENOSPC) {
3653         error_report("Could not change the backing file to '%s': No "
3654                      "space left in the file header", out_baseimg);
3655     } else if (ret < 0) {
3656         error_report("Could not change the backing file to '%s': %s",
3657             out_baseimg, strerror(-ret));
3658     }
3659 
3660     qemu_progress_print(100, 0);
3661     /*
3662      * TODO At this point it is possible to check if any clusters that are
3663      * allocated in the COW file are the same in the backing file. If so, they
3664      * could be dropped from the COW file. Don't do this before switching the
3665      * backing file, in case of a crash this would lead to corruption.
3666      */
3667 out:
3668     qemu_progress_end();
3669     /* Cleanup */
3670     if (!unsafe) {
3671         blk_unref(blk_old_backing);
3672         blk_unref(blk_new_backing);
3673     }
3674     qemu_vfree(buf_old);
3675     qemu_vfree(buf_new);
3676 
3677     blk_unref(blk);
3678     if (ret) {
3679         return 1;
3680     }
3681     return 0;
3682 }
3683 
3684 static int img_resize(int argc, char **argv)
3685 {
3686     Error *err = NULL;
3687     int c, ret, relative;
3688     const char *filename, *fmt, *size;
3689     int64_t n, total_size, current_size;
3690     bool quiet = false;
3691     BlockBackend *blk = NULL;
3692     PreallocMode prealloc = PREALLOC_MODE_OFF;
3693     QemuOpts *param;
3694 
3695     static QemuOptsList resize_options = {
3696         .name = "resize_options",
3697         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3698         .desc = {
3699             {
3700                 .name = BLOCK_OPT_SIZE,
3701                 .type = QEMU_OPT_SIZE,
3702                 .help = "Virtual disk size"
3703             }, {
3704                 /* end of list */
3705             }
3706         },
3707     };
3708     bool image_opts = false;
3709     bool shrink = false;
3710 
3711     /* Remove size from argv manually so that negative numbers are not treated
3712      * as options by getopt. */
3713     if (argc < 3) {
3714         error_exit("Not enough arguments");
3715         return 1;
3716     }
3717 
3718     size = argv[--argc];
3719 
3720     /* Parse getopt arguments */
3721     fmt = NULL;
3722     for(;;) {
3723         static const struct option long_options[] = {
3724             {"help", no_argument, 0, 'h'},
3725             {"object", required_argument, 0, OPTION_OBJECT},
3726             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3727             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
3728             {"shrink", no_argument, 0, OPTION_SHRINK},
3729             {0, 0, 0, 0}
3730         };
3731         c = getopt_long(argc, argv, ":f:hq",
3732                         long_options, NULL);
3733         if (c == -1) {
3734             break;
3735         }
3736         switch(c) {
3737         case ':':
3738             missing_argument(argv[optind - 1]);
3739             break;
3740         case '?':
3741             unrecognized_option(argv[optind - 1]);
3742             break;
3743         case 'h':
3744             help();
3745             break;
3746         case 'f':
3747             fmt = optarg;
3748             break;
3749         case 'q':
3750             quiet = true;
3751             break;
3752         case OPTION_OBJECT: {
3753             QemuOpts *opts;
3754             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3755                                            optarg, true);
3756             if (!opts) {
3757                 return 1;
3758             }
3759         }   break;
3760         case OPTION_IMAGE_OPTS:
3761             image_opts = true;
3762             break;
3763         case OPTION_PREALLOCATION:
3764             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
3765                                        PREALLOC_MODE__MAX, NULL);
3766             if (prealloc == PREALLOC_MODE__MAX) {
3767                 error_report("Invalid preallocation mode '%s'", optarg);
3768                 return 1;
3769             }
3770             break;
3771         case OPTION_SHRINK:
3772             shrink = true;
3773             break;
3774         }
3775     }
3776     if (optind != argc - 1) {
3777         error_exit("Expecting image file name and size");
3778     }
3779     filename = argv[optind++];
3780 
3781     if (qemu_opts_foreach(&qemu_object_opts,
3782                           user_creatable_add_opts_foreach,
3783                           qemu_img_object_print_help, &error_fatal)) {
3784         return 1;
3785     }
3786 
3787     /* Choose grow, shrink, or absolute resize mode */
3788     switch (size[0]) {
3789     case '+':
3790         relative = 1;
3791         size++;
3792         break;
3793     case '-':
3794         relative = -1;
3795         size++;
3796         break;
3797     default:
3798         relative = 0;
3799         break;
3800     }
3801 
3802     /* Parse size */
3803     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3804     qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3805     if (err) {
3806         error_report_err(err);
3807         ret = -1;
3808         qemu_opts_del(param);
3809         goto out;
3810     }
3811     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3812     qemu_opts_del(param);
3813 
3814     blk = img_open(image_opts, filename, fmt,
3815                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
3816                    false);
3817     if (!blk) {
3818         ret = -1;
3819         goto out;
3820     }
3821 
3822     current_size = blk_getlength(blk);
3823     if (current_size < 0) {
3824         error_report("Failed to inquire current image length: %s",
3825                      strerror(-current_size));
3826         ret = -1;
3827         goto out;
3828     }
3829 
3830     if (relative) {
3831         total_size = current_size + n * relative;
3832     } else {
3833         total_size = n;
3834     }
3835     if (total_size <= 0) {
3836         error_report("New image size must be positive");
3837         ret = -1;
3838         goto out;
3839     }
3840 
3841     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
3842         error_report("Preallocation can only be used for growing images");
3843         ret = -1;
3844         goto out;
3845     }
3846 
3847     if (total_size < current_size && !shrink) {
3848         warn_report("Shrinking an image will delete all data beyond the "
3849                     "shrunken image's end. Before performing such an "
3850                     "operation, make sure there is no important data there.");
3851 
3852         if (g_strcmp0(bdrv_get_format_name(blk_bs(blk)), "raw") != 0) {
3853             error_report(
3854               "Use the --shrink option to perform a shrink operation.");
3855             ret = -1;
3856             goto out;
3857         } else {
3858             warn_report("Using the --shrink option will suppress this message. "
3859                         "Note that future versions of qemu-img may refuse to "
3860                         "shrink images without this option.");
3861         }
3862     }
3863 
3864     /*
3865      * The user expects the image to have the desired size after
3866      * resizing, so pass @exact=true.  It is of no use to report
3867      * success when the image has not actually been resized.
3868      */
3869     ret = blk_truncate(blk, total_size, true, prealloc, &err);
3870     if (!ret) {
3871         qprintf(quiet, "Image resized.\n");
3872     } else {
3873         error_report_err(err);
3874     }
3875 out:
3876     blk_unref(blk);
3877     if (ret) {
3878         return 1;
3879     }
3880     return 0;
3881 }
3882 
3883 static void amend_status_cb(BlockDriverState *bs,
3884                             int64_t offset, int64_t total_work_size,
3885                             void *opaque)
3886 {
3887     qemu_progress_print(100.f * offset / total_work_size, 0);
3888 }
3889 
3890 static int print_amend_option_help(const char *format)
3891 {
3892     BlockDriver *drv;
3893 
3894     /* Find driver and parse its options */
3895     drv = bdrv_find_format(format);
3896     if (!drv) {
3897         error_report("Unknown file format '%s'", format);
3898         return 1;
3899     }
3900 
3901     if (!drv->bdrv_amend_options) {
3902         error_report("Format driver '%s' does not support option amendment",
3903                      format);
3904         return 1;
3905     }
3906 
3907     /* Every driver supporting amendment must have create_opts */
3908     assert(drv->create_opts);
3909 
3910     printf("Creation options for '%s':\n", format);
3911     qemu_opts_print_help(drv->create_opts, false);
3912     printf("\nNote that not all of these options may be amendable.\n");
3913     return 0;
3914 }
3915 
3916 static int img_amend(int argc, char **argv)
3917 {
3918     Error *err = NULL;
3919     int c, ret = 0;
3920     char *options = NULL;
3921     QemuOptsList *create_opts = NULL;
3922     QemuOpts *opts = NULL;
3923     const char *fmt = NULL, *filename, *cache;
3924     int flags;
3925     bool writethrough;
3926     bool quiet = false, progress = false;
3927     BlockBackend *blk = NULL;
3928     BlockDriverState *bs = NULL;
3929     bool image_opts = false;
3930 
3931     cache = BDRV_DEFAULT_CACHE;
3932     for (;;) {
3933         static const struct option long_options[] = {
3934             {"help", no_argument, 0, 'h'},
3935             {"object", required_argument, 0, OPTION_OBJECT},
3936             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3937             {0, 0, 0, 0}
3938         };
3939         c = getopt_long(argc, argv, ":ho:f:t:pq",
3940                         long_options, NULL);
3941         if (c == -1) {
3942             break;
3943         }
3944 
3945         switch (c) {
3946         case ':':
3947             missing_argument(argv[optind - 1]);
3948             break;
3949         case '?':
3950             unrecognized_option(argv[optind - 1]);
3951             break;
3952         case 'h':
3953             help();
3954             break;
3955         case 'o':
3956             if (!is_valid_option_list(optarg)) {
3957                 error_report("Invalid option list: %s", optarg);
3958                 ret = -1;
3959                 goto out_no_progress;
3960             }
3961             if (!options) {
3962                 options = g_strdup(optarg);
3963             } else {
3964                 char *old_options = options;
3965                 options = g_strdup_printf("%s,%s", options, optarg);
3966                 g_free(old_options);
3967             }
3968             break;
3969         case 'f':
3970             fmt = optarg;
3971             break;
3972         case 't':
3973             cache = optarg;
3974             break;
3975         case 'p':
3976             progress = true;
3977             break;
3978         case 'q':
3979             quiet = true;
3980             break;
3981         case OPTION_OBJECT:
3982             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3983                                            optarg, true);
3984             if (!opts) {
3985                 ret = -1;
3986                 goto out_no_progress;
3987             }
3988             break;
3989         case OPTION_IMAGE_OPTS:
3990             image_opts = true;
3991             break;
3992         }
3993     }
3994 
3995     if (!options) {
3996         error_exit("Must specify options (-o)");
3997     }
3998 
3999     if (qemu_opts_foreach(&qemu_object_opts,
4000                           user_creatable_add_opts_foreach,
4001                           qemu_img_object_print_help, &error_fatal)) {
4002         ret = -1;
4003         goto out_no_progress;
4004     }
4005 
4006     if (quiet) {
4007         progress = false;
4008     }
4009     qemu_progress_init(progress, 1.0);
4010 
4011     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4012     if (fmt && has_help_option(options)) {
4013         /* If a format is explicitly specified (and possibly no filename is
4014          * given), print option help here */
4015         ret = print_amend_option_help(fmt);
4016         goto out;
4017     }
4018 
4019     if (optind != argc - 1) {
4020         error_report("Expecting one image file name");
4021         ret = -1;
4022         goto out;
4023     }
4024 
4025     flags = BDRV_O_RDWR;
4026     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4027     if (ret < 0) {
4028         error_report("Invalid cache option: %s", cache);
4029         goto out;
4030     }
4031 
4032     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4033                    false);
4034     if (!blk) {
4035         ret = -1;
4036         goto out;
4037     }
4038     bs = blk_bs(blk);
4039 
4040     fmt = bs->drv->format_name;
4041 
4042     if (has_help_option(options)) {
4043         /* If the format was auto-detected, print option help here */
4044         ret = print_amend_option_help(fmt);
4045         goto out;
4046     }
4047 
4048     if (!bs->drv->bdrv_amend_options) {
4049         error_report("Format driver '%s' does not support option amendment",
4050                      fmt);
4051         ret = -1;
4052         goto out;
4053     }
4054 
4055     /* Every driver supporting amendment must have create_opts */
4056     assert(bs->drv->create_opts);
4057 
4058     create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
4059     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4060     qemu_opts_do_parse(opts, options, NULL, &err);
4061     if (err) {
4062         error_report_err(err);
4063         ret = -1;
4064         goto out;
4065     }
4066 
4067     /* In case the driver does not call amend_status_cb() */
4068     qemu_progress_print(0.f, 0);
4069     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, &err);
4070     qemu_progress_print(100.f, 0);
4071     if (ret < 0) {
4072         error_report_err(err);
4073         goto out;
4074     }
4075 
4076 out:
4077     qemu_progress_end();
4078 
4079 out_no_progress:
4080     blk_unref(blk);
4081     qemu_opts_del(opts);
4082     qemu_opts_free(create_opts);
4083     g_free(options);
4084 
4085     if (ret) {
4086         return 1;
4087     }
4088     return 0;
4089 }
4090 
4091 typedef struct BenchData {
4092     BlockBackend *blk;
4093     uint64_t image_size;
4094     bool write;
4095     int bufsize;
4096     int step;
4097     int nrreq;
4098     int n;
4099     int flush_interval;
4100     bool drain_on_flush;
4101     uint8_t *buf;
4102     QEMUIOVector *qiov;
4103 
4104     int in_flight;
4105     bool in_flush;
4106     uint64_t offset;
4107 } BenchData;
4108 
4109 static void bench_undrained_flush_cb(void *opaque, int ret)
4110 {
4111     if (ret < 0) {
4112         error_report("Failed flush request: %s", strerror(-ret));
4113         exit(EXIT_FAILURE);
4114     }
4115 }
4116 
4117 static void bench_cb(void *opaque, int ret)
4118 {
4119     BenchData *b = opaque;
4120     BlockAIOCB *acb;
4121 
4122     if (ret < 0) {
4123         error_report("Failed request: %s", strerror(-ret));
4124         exit(EXIT_FAILURE);
4125     }
4126 
4127     if (b->in_flush) {
4128         /* Just finished a flush with drained queue: Start next requests */
4129         assert(b->in_flight == 0);
4130         b->in_flush = false;
4131     } else if (b->in_flight > 0) {
4132         int remaining = b->n - b->in_flight;
4133 
4134         b->n--;
4135         b->in_flight--;
4136 
4137         /* Time for flush? Drain queue if requested, then flush */
4138         if (b->flush_interval && remaining % b->flush_interval == 0) {
4139             if (!b->in_flight || !b->drain_on_flush) {
4140                 BlockCompletionFunc *cb;
4141 
4142                 if (b->drain_on_flush) {
4143                     b->in_flush = true;
4144                     cb = bench_cb;
4145                 } else {
4146                     cb = bench_undrained_flush_cb;
4147                 }
4148 
4149                 acb = blk_aio_flush(b->blk, cb, b);
4150                 if (!acb) {
4151                     error_report("Failed to issue flush request");
4152                     exit(EXIT_FAILURE);
4153                 }
4154             }
4155             if (b->drain_on_flush) {
4156                 return;
4157             }
4158         }
4159     }
4160 
4161     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4162         int64_t offset = b->offset;
4163         /* blk_aio_* might look for completed I/Os and kick bench_cb
4164          * again, so make sure this operation is counted by in_flight
4165          * and b->offset is ready for the next submission.
4166          */
4167         b->in_flight++;
4168         b->offset += b->step;
4169         b->offset %= b->image_size;
4170         if (b->write) {
4171             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4172         } else {
4173             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4174         }
4175         if (!acb) {
4176             error_report("Failed to issue request");
4177             exit(EXIT_FAILURE);
4178         }
4179     }
4180 }
4181 
4182 static int img_bench(int argc, char **argv)
4183 {
4184     int c, ret = 0;
4185     const char *fmt = NULL, *filename;
4186     bool quiet = false;
4187     bool image_opts = false;
4188     bool is_write = false;
4189     int count = 75000;
4190     int depth = 64;
4191     int64_t offset = 0;
4192     size_t bufsize = 4096;
4193     int pattern = 0;
4194     size_t step = 0;
4195     int flush_interval = 0;
4196     bool drain_on_flush = true;
4197     int64_t image_size;
4198     BlockBackend *blk = NULL;
4199     BenchData data = {};
4200     int flags = 0;
4201     bool writethrough = false;
4202     struct timeval t1, t2;
4203     int i;
4204     bool force_share = false;
4205     size_t buf_size;
4206 
4207     for (;;) {
4208         static const struct option long_options[] = {
4209             {"help", no_argument, 0, 'h'},
4210             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4211             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4212             {"pattern", required_argument, 0, OPTION_PATTERN},
4213             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4214             {"force-share", no_argument, 0, 'U'},
4215             {0, 0, 0, 0}
4216         };
4217         c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
4218                         NULL);
4219         if (c == -1) {
4220             break;
4221         }
4222 
4223         switch (c) {
4224         case ':':
4225             missing_argument(argv[optind - 1]);
4226             break;
4227         case '?':
4228             unrecognized_option(argv[optind - 1]);
4229             break;
4230         case 'h':
4231             help();
4232             break;
4233         case 'c':
4234         {
4235             unsigned long res;
4236 
4237             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4238                 error_report("Invalid request count specified");
4239                 return 1;
4240             }
4241             count = res;
4242             break;
4243         }
4244         case 'd':
4245         {
4246             unsigned long res;
4247 
4248             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4249                 error_report("Invalid queue depth specified");
4250                 return 1;
4251             }
4252             depth = res;
4253             break;
4254         }
4255         case 'f':
4256             fmt = optarg;
4257             break;
4258         case 'n':
4259             flags |= BDRV_O_NATIVE_AIO;
4260             break;
4261         case 'i':
4262             ret = bdrv_parse_aio(optarg, &flags);
4263             if (ret < 0) {
4264                 error_report("Invalid aio option: %s", optarg);
4265                 ret = -1;
4266                 goto out;
4267             }
4268             break;
4269         case 'o':
4270         {
4271             offset = cvtnum(optarg);
4272             if (offset < 0) {
4273                 error_report("Invalid offset specified");
4274                 return 1;
4275             }
4276             break;
4277         }
4278             break;
4279         case 'q':
4280             quiet = true;
4281             break;
4282         case 's':
4283         {
4284             int64_t sval;
4285 
4286             sval = cvtnum(optarg);
4287             if (sval < 0 || sval > INT_MAX) {
4288                 error_report("Invalid buffer size specified");
4289                 return 1;
4290             }
4291 
4292             bufsize = sval;
4293             break;
4294         }
4295         case 'S':
4296         {
4297             int64_t sval;
4298 
4299             sval = cvtnum(optarg);
4300             if (sval < 0 || sval > INT_MAX) {
4301                 error_report("Invalid step size specified");
4302                 return 1;
4303             }
4304 
4305             step = sval;
4306             break;
4307         }
4308         case 't':
4309             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4310             if (ret < 0) {
4311                 error_report("Invalid cache mode");
4312                 ret = -1;
4313                 goto out;
4314             }
4315             break;
4316         case 'w':
4317             flags |= BDRV_O_RDWR;
4318             is_write = true;
4319             break;
4320         case 'U':
4321             force_share = true;
4322             break;
4323         case OPTION_PATTERN:
4324         {
4325             unsigned long res;
4326 
4327             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4328                 error_report("Invalid pattern byte specified");
4329                 return 1;
4330             }
4331             pattern = res;
4332             break;
4333         }
4334         case OPTION_FLUSH_INTERVAL:
4335         {
4336             unsigned long res;
4337 
4338             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4339                 error_report("Invalid flush interval specified");
4340                 return 1;
4341             }
4342             flush_interval = res;
4343             break;
4344         }
4345         case OPTION_NO_DRAIN:
4346             drain_on_flush = false;
4347             break;
4348         case OPTION_IMAGE_OPTS:
4349             image_opts = true;
4350             break;
4351         }
4352     }
4353 
4354     if (optind != argc - 1) {
4355         error_exit("Expecting one image file name");
4356     }
4357     filename = argv[argc - 1];
4358 
4359     if (!is_write && flush_interval) {
4360         error_report("--flush-interval is only available in write tests");
4361         ret = -1;
4362         goto out;
4363     }
4364     if (flush_interval && flush_interval < depth) {
4365         error_report("Flush interval can't be smaller than depth");
4366         ret = -1;
4367         goto out;
4368     }
4369 
4370     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4371                    force_share);
4372     if (!blk) {
4373         ret = -1;
4374         goto out;
4375     }
4376 
4377     image_size = blk_getlength(blk);
4378     if (image_size < 0) {
4379         ret = image_size;
4380         goto out;
4381     }
4382 
4383     data = (BenchData) {
4384         .blk            = blk,
4385         .image_size     = image_size,
4386         .bufsize        = bufsize,
4387         .step           = step ?: bufsize,
4388         .nrreq          = depth,
4389         .n              = count,
4390         .offset         = offset,
4391         .write          = is_write,
4392         .flush_interval = flush_interval,
4393         .drain_on_flush = drain_on_flush,
4394     };
4395     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4396            "(starting at offset %" PRId64 ", step size %d)\n",
4397            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4398            data.offset, data.step);
4399     if (flush_interval) {
4400         printf("Sending flush every %d requests\n", flush_interval);
4401     }
4402 
4403     buf_size = data.nrreq * data.bufsize;
4404     data.buf = blk_blockalign(blk, buf_size);
4405     memset(data.buf, pattern, data.nrreq * data.bufsize);
4406 
4407     blk_register_buf(blk, data.buf, buf_size);
4408 
4409     data.qiov = g_new(QEMUIOVector, data.nrreq);
4410     for (i = 0; i < data.nrreq; i++) {
4411         qemu_iovec_init(&data.qiov[i], 1);
4412         qemu_iovec_add(&data.qiov[i],
4413                        data.buf + i * data.bufsize, data.bufsize);
4414     }
4415 
4416     gettimeofday(&t1, NULL);
4417     bench_cb(&data, 0);
4418 
4419     while (data.n > 0) {
4420         main_loop_wait(false);
4421     }
4422     gettimeofday(&t2, NULL);
4423 
4424     printf("Run completed in %3.3f seconds.\n",
4425            (t2.tv_sec - t1.tv_sec)
4426            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4427 
4428 out:
4429     if (data.buf) {
4430         blk_unregister_buf(blk, data.buf);
4431     }
4432     qemu_vfree(data.buf);
4433     blk_unref(blk);
4434 
4435     if (ret) {
4436         return 1;
4437     }
4438     return 0;
4439 }
4440 
4441 #define C_BS      01
4442 #define C_COUNT   02
4443 #define C_IF      04
4444 #define C_OF      010
4445 #define C_SKIP    020
4446 
4447 struct DdInfo {
4448     unsigned int flags;
4449     int64_t count;
4450 };
4451 
4452 struct DdIo {
4453     int bsz;    /* Block size */
4454     char *filename;
4455     uint8_t *buf;
4456     int64_t offset;
4457 };
4458 
4459 struct DdOpts {
4460     const char *name;
4461     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
4462     unsigned int flag;
4463 };
4464 
4465 static int img_dd_bs(const char *arg,
4466                      struct DdIo *in, struct DdIo *out,
4467                      struct DdInfo *dd)
4468 {
4469     int64_t res;
4470 
4471     res = cvtnum(arg);
4472 
4473     if (res <= 0 || res > INT_MAX) {
4474         error_report("invalid number: '%s'", arg);
4475         return 1;
4476     }
4477     in->bsz = out->bsz = res;
4478 
4479     return 0;
4480 }
4481 
4482 static int img_dd_count(const char *arg,
4483                         struct DdIo *in, struct DdIo *out,
4484                         struct DdInfo *dd)
4485 {
4486     dd->count = cvtnum(arg);
4487 
4488     if (dd->count < 0) {
4489         error_report("invalid number: '%s'", arg);
4490         return 1;
4491     }
4492 
4493     return 0;
4494 }
4495 
4496 static int img_dd_if(const char *arg,
4497                      struct DdIo *in, struct DdIo *out,
4498                      struct DdInfo *dd)
4499 {
4500     in->filename = g_strdup(arg);
4501 
4502     return 0;
4503 }
4504 
4505 static int img_dd_of(const char *arg,
4506                      struct DdIo *in, struct DdIo *out,
4507                      struct DdInfo *dd)
4508 {
4509     out->filename = g_strdup(arg);
4510 
4511     return 0;
4512 }
4513 
4514 static int img_dd_skip(const char *arg,
4515                        struct DdIo *in, struct DdIo *out,
4516                        struct DdInfo *dd)
4517 {
4518     in->offset = cvtnum(arg);
4519 
4520     if (in->offset < 0) {
4521         error_report("invalid number: '%s'", arg);
4522         return 1;
4523     }
4524 
4525     return 0;
4526 }
4527 
4528 static int img_dd(int argc, char **argv)
4529 {
4530     int ret = 0;
4531     char *arg = NULL;
4532     char *tmp;
4533     BlockDriver *drv = NULL, *proto_drv = NULL;
4534     BlockBackend *blk1 = NULL, *blk2 = NULL;
4535     QemuOpts *opts = NULL;
4536     QemuOptsList *create_opts = NULL;
4537     Error *local_err = NULL;
4538     bool image_opts = false;
4539     int c, i;
4540     const char *out_fmt = "raw";
4541     const char *fmt = NULL;
4542     int64_t size = 0;
4543     int64_t block_count = 0, out_pos, in_pos;
4544     bool force_share = false;
4545     struct DdInfo dd = {
4546         .flags = 0,
4547         .count = 0,
4548     };
4549     struct DdIo in = {
4550         .bsz = 512, /* Block size is by default 512 bytes */
4551         .filename = NULL,
4552         .buf = NULL,
4553         .offset = 0
4554     };
4555     struct DdIo out = {
4556         .bsz = 512,
4557         .filename = NULL,
4558         .buf = NULL,
4559         .offset = 0
4560     };
4561 
4562     const struct DdOpts options[] = {
4563         { "bs", img_dd_bs, C_BS },
4564         { "count", img_dd_count, C_COUNT },
4565         { "if", img_dd_if, C_IF },
4566         { "of", img_dd_of, C_OF },
4567         { "skip", img_dd_skip, C_SKIP },
4568         { NULL, NULL, 0 }
4569     };
4570     const struct option long_options[] = {
4571         { "help", no_argument, 0, 'h'},
4572         { "object", required_argument, 0, OPTION_OBJECT},
4573         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4574         { "force-share", no_argument, 0, 'U'},
4575         { 0, 0, 0, 0 }
4576     };
4577 
4578     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
4579         if (c == EOF) {
4580             break;
4581         }
4582         switch (c) {
4583         case 'O':
4584             out_fmt = optarg;
4585             break;
4586         case 'f':
4587             fmt = optarg;
4588             break;
4589         case ':':
4590             missing_argument(argv[optind - 1]);
4591             break;
4592         case '?':
4593             unrecognized_option(argv[optind - 1]);
4594             break;
4595         case 'h':
4596             help();
4597             break;
4598         case 'U':
4599             force_share = true;
4600             break;
4601         case OPTION_OBJECT:
4602             if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) {
4603                 ret = -1;
4604                 goto out;
4605             }
4606             break;
4607         case OPTION_IMAGE_OPTS:
4608             image_opts = true;
4609             break;
4610         }
4611     }
4612 
4613     for (i = optind; i < argc; i++) {
4614         int j;
4615         arg = g_strdup(argv[i]);
4616 
4617         tmp = strchr(arg, '=');
4618         if (tmp == NULL) {
4619             error_report("unrecognized operand %s", arg);
4620             ret = -1;
4621             goto out;
4622         }
4623 
4624         *tmp++ = '\0';
4625 
4626         for (j = 0; options[j].name != NULL; j++) {
4627             if (!strcmp(arg, options[j].name)) {
4628                 break;
4629             }
4630         }
4631         if (options[j].name == NULL) {
4632             error_report("unrecognized operand %s", arg);
4633             ret = -1;
4634             goto out;
4635         }
4636 
4637         if (options[j].f(tmp, &in, &out, &dd) != 0) {
4638             ret = -1;
4639             goto out;
4640         }
4641         dd.flags |= options[j].flag;
4642         g_free(arg);
4643         arg = NULL;
4644     }
4645 
4646     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4647         error_report("Must specify both input and output files");
4648         ret = -1;
4649         goto out;
4650     }
4651 
4652     if (qemu_opts_foreach(&qemu_object_opts,
4653                           user_creatable_add_opts_foreach,
4654                           qemu_img_object_print_help, &error_fatal)) {
4655         ret = -1;
4656         goto out;
4657     }
4658 
4659     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
4660                     force_share);
4661 
4662     if (!blk1) {
4663         ret = -1;
4664         goto out;
4665     }
4666 
4667     drv = bdrv_find_format(out_fmt);
4668     if (!drv) {
4669         error_report("Unknown file format");
4670         ret = -1;
4671         goto out;
4672     }
4673     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4674 
4675     if (!proto_drv) {
4676         error_report_err(local_err);
4677         ret = -1;
4678         goto out;
4679     }
4680     if (!drv->create_opts) {
4681         error_report("Format driver '%s' does not support image creation",
4682                      drv->format_name);
4683         ret = -1;
4684         goto out;
4685     }
4686     if (!proto_drv->create_opts) {
4687         error_report("Protocol driver '%s' does not support image creation",
4688                      proto_drv->format_name);
4689         ret = -1;
4690         goto out;
4691     }
4692     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4693     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4694 
4695     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4696 
4697     size = blk_getlength(blk1);
4698     if (size < 0) {
4699         error_report("Failed to get size for '%s'", in.filename);
4700         ret = -1;
4701         goto out;
4702     }
4703 
4704     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4705         dd.count * in.bsz < size) {
4706         size = dd.count * in.bsz;
4707     }
4708 
4709     /* Overflow means the specified offset is beyond input image's size */
4710     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4711                               size < in.bsz * in.offset)) {
4712         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4713     } else {
4714         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4715                             size - in.bsz * in.offset, &error_abort);
4716     }
4717 
4718     ret = bdrv_create(drv, out.filename, opts, &local_err);
4719     if (ret < 0) {
4720         error_reportf_err(local_err,
4721                           "%s: error while creating output image: ",
4722                           out.filename);
4723         ret = -1;
4724         goto out;
4725     }
4726 
4727     /* TODO, we can't honour --image-opts for the target,
4728      * since it needs to be given in a format compatible
4729      * with the bdrv_create() call above which does not
4730      * support image-opts style.
4731      */
4732     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
4733                          false, false, false);
4734 
4735     if (!blk2) {
4736         ret = -1;
4737         goto out;
4738     }
4739 
4740     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4741                               size < in.offset * in.bsz)) {
4742         /* We give a warning if the skip option is bigger than the input
4743          * size and create an empty output disk image (i.e. like dd(1)).
4744          */
4745         error_report("%s: cannot skip to specified offset", in.filename);
4746         in_pos = size;
4747     } else {
4748         in_pos = in.offset * in.bsz;
4749     }
4750 
4751     in.buf = g_new(uint8_t, in.bsz);
4752 
4753     for (out_pos = 0; in_pos < size; block_count++) {
4754         int in_ret, out_ret;
4755 
4756         if (in_pos + in.bsz > size) {
4757             in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4758         } else {
4759             in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4760         }
4761         if (in_ret < 0) {
4762             error_report("error while reading from input image file: %s",
4763                          strerror(-in_ret));
4764             ret = -1;
4765             goto out;
4766         }
4767         in_pos += in_ret;
4768 
4769         out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4770 
4771         if (out_ret < 0) {
4772             error_report("error while writing to output image file: %s",
4773                          strerror(-out_ret));
4774             ret = -1;
4775             goto out;
4776         }
4777         out_pos += out_ret;
4778     }
4779 
4780 out:
4781     g_free(arg);
4782     qemu_opts_del(opts);
4783     qemu_opts_free(create_opts);
4784     blk_unref(blk1);
4785     blk_unref(blk2);
4786     g_free(in.filename);
4787     g_free(out.filename);
4788     g_free(in.buf);
4789     g_free(out.buf);
4790 
4791     if (ret) {
4792         return 1;
4793     }
4794     return 0;
4795 }
4796 
4797 static void dump_json_block_measure_info(BlockMeasureInfo *info)
4798 {
4799     QString *str;
4800     QObject *obj;
4801     Visitor *v = qobject_output_visitor_new(&obj);
4802 
4803     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
4804     visit_complete(v, &obj);
4805     str = qobject_to_json_pretty(obj);
4806     assert(str != NULL);
4807     printf("%s\n", qstring_get_str(str));
4808     qobject_unref(obj);
4809     visit_free(v);
4810     qobject_unref(str);
4811 }
4812 
4813 static int img_measure(int argc, char **argv)
4814 {
4815     static const struct option long_options[] = {
4816         {"help", no_argument, 0, 'h'},
4817         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4818         {"object", required_argument, 0, OPTION_OBJECT},
4819         {"output", required_argument, 0, OPTION_OUTPUT},
4820         {"size", required_argument, 0, OPTION_SIZE},
4821         {"force-share", no_argument, 0, 'U'},
4822         {0, 0, 0, 0}
4823     };
4824     OutputFormat output_format = OFORMAT_HUMAN;
4825     BlockBackend *in_blk = NULL;
4826     BlockDriver *drv;
4827     const char *filename = NULL;
4828     const char *fmt = NULL;
4829     const char *out_fmt = "raw";
4830     char *options = NULL;
4831     char *snapshot_name = NULL;
4832     bool force_share = false;
4833     QemuOpts *opts = NULL;
4834     QemuOpts *object_opts = NULL;
4835     QemuOpts *sn_opts = NULL;
4836     QemuOptsList *create_opts = NULL;
4837     bool image_opts = false;
4838     uint64_t img_size = UINT64_MAX;
4839     BlockMeasureInfo *info = NULL;
4840     Error *local_err = NULL;
4841     int ret = 1;
4842     int c;
4843 
4844     while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
4845                             long_options, NULL)) != -1) {
4846         switch (c) {
4847         case '?':
4848         case 'h':
4849             help();
4850             break;
4851         case 'f':
4852             fmt = optarg;
4853             break;
4854         case 'O':
4855             out_fmt = optarg;
4856             break;
4857         case 'o':
4858             if (!is_valid_option_list(optarg)) {
4859                 error_report("Invalid option list: %s", optarg);
4860                 goto out;
4861             }
4862             if (!options) {
4863                 options = g_strdup(optarg);
4864             } else {
4865                 char *old_options = options;
4866                 options = g_strdup_printf("%s,%s", options, optarg);
4867                 g_free(old_options);
4868             }
4869             break;
4870         case 'l':
4871             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
4872                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
4873                                                   optarg, false);
4874                 if (!sn_opts) {
4875                     error_report("Failed in parsing snapshot param '%s'",
4876                                  optarg);
4877                     goto out;
4878                 }
4879             } else {
4880                 snapshot_name = optarg;
4881             }
4882             break;
4883         case 'U':
4884             force_share = true;
4885             break;
4886         case OPTION_OBJECT:
4887             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
4888                                                   optarg, true);
4889             if (!object_opts) {
4890                 goto out;
4891             }
4892             break;
4893         case OPTION_IMAGE_OPTS:
4894             image_opts = true;
4895             break;
4896         case OPTION_OUTPUT:
4897             if (!strcmp(optarg, "json")) {
4898                 output_format = OFORMAT_JSON;
4899             } else if (!strcmp(optarg, "human")) {
4900                 output_format = OFORMAT_HUMAN;
4901             } else {
4902                 error_report("--output must be used with human or json "
4903                              "as argument.");
4904                 goto out;
4905             }
4906             break;
4907         case OPTION_SIZE:
4908         {
4909             int64_t sval;
4910 
4911             sval = cvtnum(optarg);
4912             if (sval < 0) {
4913                 if (sval == -ERANGE) {
4914                     error_report("Image size must be less than 8 EiB!");
4915                 } else {
4916                     error_report("Invalid image size specified! You may use "
4917                                  "k, M, G, T, P or E suffixes for ");
4918                     error_report("kilobytes, megabytes, gigabytes, terabytes, "
4919                                  "petabytes and exabytes.");
4920                 }
4921                 goto out;
4922             }
4923             img_size = (uint64_t)sval;
4924         }
4925         break;
4926         }
4927     }
4928 
4929     if (qemu_opts_foreach(&qemu_object_opts,
4930                           user_creatable_add_opts_foreach,
4931                           qemu_img_object_print_help, &error_fatal)) {
4932         goto out;
4933     }
4934 
4935     if (argc - optind > 1) {
4936         error_report("At most one filename argument is allowed.");
4937         goto out;
4938     } else if (argc - optind == 1) {
4939         filename = argv[optind];
4940     }
4941 
4942     if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
4943         error_report("--image-opts, -f, and -l require a filename argument.");
4944         goto out;
4945     }
4946     if (filename && img_size != UINT64_MAX) {
4947         error_report("--size N cannot be used together with a filename.");
4948         goto out;
4949     }
4950     if (!filename && img_size == UINT64_MAX) {
4951         error_report("Either --size N or one filename must be specified.");
4952         goto out;
4953     }
4954 
4955     if (filename) {
4956         in_blk = img_open(image_opts, filename, fmt, 0,
4957                           false, false, force_share);
4958         if (!in_blk) {
4959             goto out;
4960         }
4961 
4962         if (sn_opts) {
4963             bdrv_snapshot_load_tmp(blk_bs(in_blk),
4964                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
4965                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
4966                     &local_err);
4967         } else if (snapshot_name != NULL) {
4968             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
4969                     snapshot_name, &local_err);
4970         }
4971         if (local_err) {
4972             error_reportf_err(local_err, "Failed to load snapshot: ");
4973             goto out;
4974         }
4975     }
4976 
4977     drv = bdrv_find_format(out_fmt);
4978     if (!drv) {
4979         error_report("Unknown file format '%s'", out_fmt);
4980         goto out;
4981     }
4982     if (!drv->create_opts) {
4983         error_report("Format driver '%s' does not support image creation",
4984                      drv->format_name);
4985         goto out;
4986     }
4987 
4988     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4989     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
4990     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4991     if (options) {
4992         qemu_opts_do_parse(opts, options, NULL, &local_err);
4993         if (local_err) {
4994             error_report_err(local_err);
4995             error_report("Invalid options for file format '%s'", out_fmt);
4996             goto out;
4997         }
4998     }
4999     if (img_size != UINT64_MAX) {
5000         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5001     }
5002 
5003     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
5004     if (local_err) {
5005         error_report_err(local_err);
5006         goto out;
5007     }
5008 
5009     if (output_format == OFORMAT_HUMAN) {
5010         printf("required size: %" PRIu64 "\n", info->required);
5011         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5012     } else {
5013         dump_json_block_measure_info(info);
5014     }
5015 
5016     ret = 0;
5017 
5018 out:
5019     qapi_free_BlockMeasureInfo(info);
5020     qemu_opts_del(object_opts);
5021     qemu_opts_del(opts);
5022     qemu_opts_del(sn_opts);
5023     qemu_opts_free(create_opts);
5024     g_free(options);
5025     blk_unref(in_blk);
5026     return ret;
5027 }
5028 
5029 static const img_cmd_t img_cmds[] = {
5030 #define DEF(option, callback, arg_string)        \
5031     { option, callback },
5032 #include "qemu-img-cmds.h"
5033 #undef DEF
5034     { NULL, NULL, },
5035 };
5036 
5037 int main(int argc, char **argv)
5038 {
5039     const img_cmd_t *cmd;
5040     const char *cmdname;
5041     Error *local_error = NULL;
5042     char *trace_file = NULL;
5043     int c;
5044     static const struct option long_options[] = {
5045         {"help", no_argument, 0, 'h'},
5046         {"version", no_argument, 0, 'V'},
5047         {"trace", required_argument, NULL, 'T'},
5048         {0, 0, 0, 0}
5049     };
5050 
5051 #ifdef CONFIG_POSIX
5052     signal(SIGPIPE, SIG_IGN);
5053 #endif
5054 
5055     error_init(argv[0]);
5056     module_call_init(MODULE_INIT_TRACE);
5057     qemu_init_exec_dir(argv[0]);
5058 
5059     if (qemu_init_main_loop(&local_error)) {
5060         error_report_err(local_error);
5061         exit(EXIT_FAILURE);
5062     }
5063 
5064     qcrypto_init(&error_fatal);
5065 
5066     module_call_init(MODULE_INIT_QOM);
5067     bdrv_init();
5068     if (argc < 2) {
5069         error_exit("Not enough arguments");
5070     }
5071 
5072     qemu_add_opts(&qemu_object_opts);
5073     qemu_add_opts(&qemu_source_opts);
5074     qemu_add_opts(&qemu_trace_opts);
5075 
5076     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5077         switch (c) {
5078         case ':':
5079             missing_argument(argv[optind - 1]);
5080             return 0;
5081         case '?':
5082             unrecognized_option(argv[optind - 1]);
5083             return 0;
5084         case 'h':
5085             help();
5086             return 0;
5087         case 'V':
5088             printf(QEMU_IMG_VERSION);
5089             return 0;
5090         case 'T':
5091             g_free(trace_file);
5092             trace_file = trace_opt_parse(optarg);
5093             break;
5094         }
5095     }
5096 
5097     cmdname = argv[optind];
5098 
5099     /* reset getopt_long scanning */
5100     argc -= optind;
5101     if (argc < 1) {
5102         return 0;
5103     }
5104     argv += optind;
5105     qemu_reset_optind();
5106 
5107     if (!trace_init_backends()) {
5108         exit(1);
5109     }
5110     trace_init_file(trace_file);
5111     qemu_set_log(LOG_TRACE);
5112 
5113     /* find the command */
5114     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5115         if (!strcmp(cmdname, cmd->name)) {
5116             return cmd->handler(argc, argv);
5117         }
5118     }
5119 
5120     /* not found */
5121     error_exit("Command not found: %s", cmdname);
5122 }
5123