xref: /openbmc/qemu/qemu-img.c (revision 88cd34ee)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu-common.h"
29 #include "qemu-version.h"
30 #include "qapi/error.h"
31 #include "qapi/qapi-visit-block-core.h"
32 #include "qapi/qobject-output-visitor.h"
33 #include "qapi/qmp/qjson.h"
34 #include "qapi/qmp/qdict.h"
35 #include "qapi/qmp/qstring.h"
36 #include "qemu/cutils.h"
37 #include "qemu/config-file.h"
38 #include "qemu/option.h"
39 #include "qemu/error-report.h"
40 #include "qemu/log.h"
41 #include "qemu/main-loop.h"
42 #include "qemu/module.h"
43 #include "qemu/units.h"
44 #include "qom/object_interfaces.h"
45 #include "sysemu/block-backend.h"
46 #include "block/block_int.h"
47 #include "block/blockjob.h"
48 #include "block/qapi.h"
49 #include "crypto/init.h"
50 #include "trace/control.h"
51 
52 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
53                           "\n" QEMU_COPYRIGHT "\n"
54 
55 typedef struct img_cmd_t {
56     const char *name;
57     int (*handler)(int argc, char **argv);
58 } img_cmd_t;
59 
60 enum {
61     OPTION_OUTPUT = 256,
62     OPTION_BACKING_CHAIN = 257,
63     OPTION_OBJECT = 258,
64     OPTION_IMAGE_OPTS = 259,
65     OPTION_PATTERN = 260,
66     OPTION_FLUSH_INTERVAL = 261,
67     OPTION_NO_DRAIN = 262,
68     OPTION_TARGET_IMAGE_OPTS = 263,
69     OPTION_SIZE = 264,
70     OPTION_PREALLOCATION = 265,
71     OPTION_SHRINK = 266,
72     OPTION_SALVAGE = 267,
73     OPTION_TARGET_IS_ZERO = 268,
74 };
75 
76 typedef enum OutputFormat {
77     OFORMAT_JSON,
78     OFORMAT_HUMAN,
79 } OutputFormat;
80 
81 /* Default to cache=writeback as data integrity is not important for qemu-img */
82 #define BDRV_DEFAULT_CACHE "writeback"
83 
84 static void format_print(void *opaque, const char *name)
85 {
86     printf(" %s", name);
87 }
88 
89 static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
90 {
91     va_list ap;
92 
93     va_start(ap, fmt);
94     error_vreport(fmt, ap);
95     va_end(ap);
96 
97     error_printf("Try 'qemu-img --help' for more information\n");
98     exit(EXIT_FAILURE);
99 }
100 
101 static void QEMU_NORETURN missing_argument(const char *option)
102 {
103     error_exit("missing argument for option '%s'", option);
104 }
105 
106 static void QEMU_NORETURN unrecognized_option(const char *option)
107 {
108     error_exit("unrecognized option '%s'", option);
109 }
110 
111 /* Please keep in synch with qemu-img.texi */
112 static void QEMU_NORETURN help(void)
113 {
114     const char *help_msg =
115            QEMU_IMG_VERSION
116            "usage: qemu-img [standard options] command [command options]\n"
117            "QEMU disk image utility\n"
118            "\n"
119            "    '-h', '--help'       display this help and exit\n"
120            "    '-V', '--version'    output version information and exit\n"
121            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
122            "                         specify tracing options\n"
123            "\n"
124            "Command syntax:\n"
125 #define DEF(option, callback, arg_string)        \
126            "  " arg_string "\n"
127 #include "qemu-img-cmds.h"
128 #undef DEF
129            "\n"
130            "Command parameters:\n"
131            "  'filename' is a disk image filename\n"
132            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
133            "    manual page for a description of the object properties. The most common\n"
134            "    object type is a 'secret', which is used to supply passwords and/or\n"
135            "    encryption keys.\n"
136            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
137            "  'cache' is the cache mode used to write the output disk image, the valid\n"
138            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
139            "    'directsync' and 'unsafe' (default for convert)\n"
140            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
141            "    options are the same as for the 'cache' option\n"
142            "  'size' is the disk image size in bytes. Optional suffixes\n"
143            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
144            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
145            "    supported. 'b' is ignored.\n"
146            "  'output_filename' is the destination disk image filename\n"
147            "  'output_fmt' is the destination format\n"
148            "  'options' is a comma separated list of format specific options in a\n"
149            "    name=value format. Use -o ? for an overview of the options supported by the\n"
150            "    used format\n"
151            "  'snapshot_param' is param used for internal snapshot, format\n"
152            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
153            "    '[ID_OR_NAME]'\n"
154            "  '-c' indicates that target image must be compressed (qcow format only)\n"
155            "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
156            "       new backing file match exactly. The image doesn't need a working\n"
157            "       backing file before rebasing in this case (useful for renaming the\n"
158            "       backing file). For image creation, allow creating without attempting\n"
159            "       to open the backing file.\n"
160            "  '-h' with or without a command shows this help and lists the supported formats\n"
161            "  '-p' show progress of command (only certain commands)\n"
162            "  '-q' use Quiet mode - do not print any output (except errors)\n"
163            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
164            "       contain only zeros for qemu-img to create a sparse image during\n"
165            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
166            "       unallocated or zero sectors, and the destination image will always be\n"
167            "       fully allocated\n"
168            "  '--output' takes the format in which the output must be done (human or json)\n"
169            "  '-n' skips the target volume creation (useful if the volume is created\n"
170            "       prior to running qemu-img)\n"
171            "\n"
172            "Parameters to check subcommand:\n"
173            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
174            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
175            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
176            "       hiding corruption that has already occurred.\n"
177            "\n"
178            "Parameters to convert subcommand:\n"
179            "  '-m' specifies how many coroutines work in parallel during the convert\n"
180            "       process (defaults to 8)\n"
181            "  '-W' allow to write to the target out of order rather than sequential\n"
182            "\n"
183            "Parameters to snapshot subcommand:\n"
184            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
185            "  '-a' applies a snapshot (revert disk to saved state)\n"
186            "  '-c' creates a snapshot\n"
187            "  '-d' deletes a snapshot\n"
188            "  '-l' lists all snapshots in the given image\n"
189            "\n"
190            "Parameters to compare subcommand:\n"
191            "  '-f' first image format\n"
192            "  '-F' second image format\n"
193            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
194            "\n"
195            "Parameters to dd subcommand:\n"
196            "  'bs=BYTES' read and write up to BYTES bytes at a time "
197            "(default: 512)\n"
198            "  'count=N' copy only N input blocks\n"
199            "  'if=FILE' read from FILE\n"
200            "  'of=FILE' write to FILE\n"
201            "  'skip=N' skip N bs-sized blocks at the start of input\n";
202 
203     printf("%s\nSupported formats:", help_msg);
204     bdrv_iterate_format(format_print, NULL, false);
205     printf("\n\n" QEMU_HELP_BOTTOM "\n");
206     exit(EXIT_SUCCESS);
207 }
208 
209 static QemuOptsList qemu_object_opts = {
210     .name = "object",
211     .implied_opt_name = "qom-type",
212     .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
213     .desc = {
214         { }
215     },
216 };
217 
218 static bool qemu_img_object_print_help(const char *type, QemuOpts *opts)
219 {
220     if (user_creatable_print_help(type, opts)) {
221         exit(0);
222     }
223     return true;
224 }
225 
226 static QemuOptsList qemu_source_opts = {
227     .name = "source",
228     .implied_opt_name = "file",
229     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
230     .desc = {
231         { }
232     },
233 };
234 
235 static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
236 {
237     int ret = 0;
238     if (!quiet) {
239         va_list args;
240         va_start(args, fmt);
241         ret = vprintf(fmt, args);
242         va_end(args);
243     }
244     return ret;
245 }
246 
247 
248 static int print_block_option_help(const char *filename, const char *fmt)
249 {
250     BlockDriver *drv, *proto_drv;
251     QemuOptsList *create_opts = NULL;
252     Error *local_err = NULL;
253 
254     /* Find driver and parse its options */
255     drv = bdrv_find_format(fmt);
256     if (!drv) {
257         error_report("Unknown file format '%s'", fmt);
258         return 1;
259     }
260 
261     if (!drv->create_opts) {
262         error_report("Format driver '%s' does not support image creation", fmt);
263         return 1;
264     }
265 
266     create_opts = qemu_opts_append(create_opts, drv->create_opts);
267     if (filename) {
268         proto_drv = bdrv_find_protocol(filename, true, &local_err);
269         if (!proto_drv) {
270             error_report_err(local_err);
271             qemu_opts_free(create_opts);
272             return 1;
273         }
274         if (!proto_drv->create_opts) {
275             error_report("Protocol driver '%s' does not support image creation",
276                          proto_drv->format_name);
277             qemu_opts_free(create_opts);
278             return 1;
279         }
280         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
281     }
282 
283     if (filename) {
284         printf("Supported options:\n");
285     } else {
286         printf("Supported %s options:\n", fmt);
287     }
288     qemu_opts_print_help(create_opts, false);
289     qemu_opts_free(create_opts);
290 
291     if (!filename) {
292         printf("\n"
293                "The protocol level may support further options.\n"
294                "Specify the target filename to include those options.\n");
295     }
296 
297     return 0;
298 }
299 
300 
301 static BlockBackend *img_open_opts(const char *optstr,
302                                    QemuOpts *opts, int flags, bool writethrough,
303                                    bool quiet, bool force_share)
304 {
305     QDict *options;
306     Error *local_err = NULL;
307     BlockBackend *blk;
308     options = qemu_opts_to_qdict(opts, NULL);
309     if (force_share) {
310         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
311             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
312             error_report("--force-share/-U conflicts with image options");
313             qobject_unref(options);
314             return NULL;
315         }
316         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
317     }
318     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
319     if (!blk) {
320         error_reportf_err(local_err, "Could not open '%s': ", optstr);
321         return NULL;
322     }
323     blk_set_enable_write_cache(blk, !writethrough);
324 
325     return blk;
326 }
327 
328 static BlockBackend *img_open_file(const char *filename,
329                                    QDict *options,
330                                    const char *fmt, int flags,
331                                    bool writethrough, bool quiet,
332                                    bool force_share)
333 {
334     BlockBackend *blk;
335     Error *local_err = NULL;
336 
337     if (!options) {
338         options = qdict_new();
339     }
340     if (fmt) {
341         qdict_put_str(options, "driver", fmt);
342     }
343 
344     if (force_share) {
345         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
346     }
347     blk = blk_new_open(filename, NULL, options, flags, &local_err);
348     if (!blk) {
349         error_reportf_err(local_err, "Could not open '%s': ", filename);
350         return NULL;
351     }
352     blk_set_enable_write_cache(blk, !writethrough);
353 
354     return blk;
355 }
356 
357 
358 static int img_add_key_secrets(void *opaque,
359                                const char *name, const char *value,
360                                Error **errp)
361 {
362     QDict *options = opaque;
363 
364     if (g_str_has_suffix(name, "key-secret")) {
365         qdict_put_str(options, name, value);
366     }
367 
368     return 0;
369 }
370 
371 
372 static BlockBackend *img_open(bool image_opts,
373                               const char *filename,
374                               const char *fmt, int flags, bool writethrough,
375                               bool quiet, bool force_share)
376 {
377     BlockBackend *blk;
378     if (image_opts) {
379         QemuOpts *opts;
380         if (fmt) {
381             error_report("--image-opts and --format are mutually exclusive");
382             return NULL;
383         }
384         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
385                                        filename, true);
386         if (!opts) {
387             return NULL;
388         }
389         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
390                             force_share);
391     } else {
392         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
393                             force_share);
394     }
395     return blk;
396 }
397 
398 
399 static int add_old_style_options(const char *fmt, QemuOpts *opts,
400                                  const char *base_filename,
401                                  const char *base_fmt)
402 {
403     Error *err = NULL;
404 
405     if (base_filename) {
406         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
407         if (err) {
408             error_report("Backing file not supported for file format '%s'",
409                          fmt);
410             error_free(err);
411             return -1;
412         }
413     }
414     if (base_fmt) {
415         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
416         if (err) {
417             error_report("Backing file format not supported for file "
418                          "format '%s'", fmt);
419             error_free(err);
420             return -1;
421         }
422     }
423     return 0;
424 }
425 
426 static int64_t cvtnum(const char *s)
427 {
428     int err;
429     uint64_t value;
430 
431     err = qemu_strtosz(s, NULL, &value);
432     if (err < 0) {
433         return err;
434     }
435     if (value > INT64_MAX) {
436         return -ERANGE;
437     }
438     return value;
439 }
440 
441 static int img_create(int argc, char **argv)
442 {
443     int c;
444     uint64_t img_size = -1;
445     const char *fmt = "raw";
446     const char *base_fmt = NULL;
447     const char *filename;
448     const char *base_filename = NULL;
449     char *options = NULL;
450     Error *local_err = NULL;
451     bool quiet = false;
452     int flags = 0;
453 
454     for(;;) {
455         static const struct option long_options[] = {
456             {"help", no_argument, 0, 'h'},
457             {"object", required_argument, 0, OPTION_OBJECT},
458             {0, 0, 0, 0}
459         };
460         c = getopt_long(argc, argv, ":F:b:f:ho:qu",
461                         long_options, NULL);
462         if (c == -1) {
463             break;
464         }
465         switch(c) {
466         case ':':
467             missing_argument(argv[optind - 1]);
468             break;
469         case '?':
470             unrecognized_option(argv[optind - 1]);
471             break;
472         case 'h':
473             help();
474             break;
475         case 'F':
476             base_fmt = optarg;
477             break;
478         case 'b':
479             base_filename = optarg;
480             break;
481         case 'f':
482             fmt = optarg;
483             break;
484         case 'o':
485             if (!is_valid_option_list(optarg)) {
486                 error_report("Invalid option list: %s", optarg);
487                 goto fail;
488             }
489             if (!options) {
490                 options = g_strdup(optarg);
491             } else {
492                 char *old_options = options;
493                 options = g_strdup_printf("%s,%s", options, optarg);
494                 g_free(old_options);
495             }
496             break;
497         case 'q':
498             quiet = true;
499             break;
500         case 'u':
501             flags |= BDRV_O_NO_BACKING;
502             break;
503         case OPTION_OBJECT: {
504             QemuOpts *opts;
505             opts = qemu_opts_parse_noisily(&qemu_object_opts,
506                                            optarg, true);
507             if (!opts) {
508                 goto fail;
509             }
510         }   break;
511         }
512     }
513 
514     /* Get the filename */
515     filename = (optind < argc) ? argv[optind] : NULL;
516     if (options && has_help_option(options)) {
517         g_free(options);
518         return print_block_option_help(filename, fmt);
519     }
520 
521     if (optind >= argc) {
522         error_exit("Expecting image file name");
523     }
524     optind++;
525 
526     if (qemu_opts_foreach(&qemu_object_opts,
527                           user_creatable_add_opts_foreach,
528                           qemu_img_object_print_help, &error_fatal)) {
529         goto fail;
530     }
531 
532     /* Get image size, if specified */
533     if (optind < argc) {
534         int64_t sval;
535 
536         sval = cvtnum(argv[optind++]);
537         if (sval < 0) {
538             if (sval == -ERANGE) {
539                 error_report("Image size must be less than 8 EiB!");
540             } else {
541                 error_report("Invalid image size specified! You may use k, M, "
542                       "G, T, P or E suffixes for ");
543                 error_report("kilobytes, megabytes, gigabytes, terabytes, "
544                              "petabytes and exabytes.");
545             }
546             goto fail;
547         }
548         img_size = (uint64_t)sval;
549     }
550     if (optind != argc) {
551         error_exit("Unexpected argument: %s", argv[optind]);
552     }
553 
554     bdrv_img_create(filename, fmt, base_filename, base_fmt,
555                     options, img_size, flags, quiet, &local_err);
556     if (local_err) {
557         error_reportf_err(local_err, "%s: ", filename);
558         goto fail;
559     }
560 
561     g_free(options);
562     return 0;
563 
564 fail:
565     g_free(options);
566     return 1;
567 }
568 
569 static void dump_json_image_check(ImageCheck *check, bool quiet)
570 {
571     QString *str;
572     QObject *obj;
573     Visitor *v = qobject_output_visitor_new(&obj);
574 
575     visit_type_ImageCheck(v, NULL, &check, &error_abort);
576     visit_complete(v, &obj);
577     str = qobject_to_json_pretty(obj);
578     assert(str != NULL);
579     qprintf(quiet, "%s\n", qstring_get_str(str));
580     qobject_unref(obj);
581     visit_free(v);
582     qobject_unref(str);
583 }
584 
585 static void dump_human_image_check(ImageCheck *check, bool quiet)
586 {
587     if (!(check->corruptions || check->leaks || check->check_errors)) {
588         qprintf(quiet, "No errors were found on the image.\n");
589     } else {
590         if (check->corruptions) {
591             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
592                     "Data may be corrupted, or further writes to the image "
593                     "may corrupt it.\n",
594                     check->corruptions);
595         }
596 
597         if (check->leaks) {
598             qprintf(quiet,
599                     "\n%" PRId64 " leaked clusters were found on the image.\n"
600                     "This means waste of disk space, but no harm to data.\n",
601                     check->leaks);
602         }
603 
604         if (check->check_errors) {
605             qprintf(quiet,
606                     "\n%" PRId64
607                     " internal errors have occurred during the check.\n",
608                     check->check_errors);
609         }
610     }
611 
612     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
613         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
614                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
615                 check->allocated_clusters, check->total_clusters,
616                 check->allocated_clusters * 100.0 / check->total_clusters,
617                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
618                 check->compressed_clusters * 100.0 /
619                 check->allocated_clusters);
620     }
621 
622     if (check->image_end_offset) {
623         qprintf(quiet,
624                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
625     }
626 }
627 
628 static int collect_image_check(BlockDriverState *bs,
629                    ImageCheck *check,
630                    const char *filename,
631                    const char *fmt,
632                    int fix)
633 {
634     int ret;
635     BdrvCheckResult result;
636 
637     ret = bdrv_check(bs, &result, fix);
638     if (ret < 0) {
639         return ret;
640     }
641 
642     check->filename                 = g_strdup(filename);
643     check->format                   = g_strdup(bdrv_get_format_name(bs));
644     check->check_errors             = result.check_errors;
645     check->corruptions              = result.corruptions;
646     check->has_corruptions          = result.corruptions != 0;
647     check->leaks                    = result.leaks;
648     check->has_leaks                = result.leaks != 0;
649     check->corruptions_fixed        = result.corruptions_fixed;
650     check->has_corruptions_fixed    = result.corruptions != 0;
651     check->leaks_fixed              = result.leaks_fixed;
652     check->has_leaks_fixed          = result.leaks != 0;
653     check->image_end_offset         = result.image_end_offset;
654     check->has_image_end_offset     = result.image_end_offset != 0;
655     check->total_clusters           = result.bfi.total_clusters;
656     check->has_total_clusters       = result.bfi.total_clusters != 0;
657     check->allocated_clusters       = result.bfi.allocated_clusters;
658     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
659     check->fragmented_clusters      = result.bfi.fragmented_clusters;
660     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
661     check->compressed_clusters      = result.bfi.compressed_clusters;
662     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
663 
664     return 0;
665 }
666 
667 /*
668  * Checks an image for consistency. Exit codes:
669  *
670  *  0 - Check completed, image is good
671  *  1 - Check not completed because of internal errors
672  *  2 - Check completed, image is corrupted
673  *  3 - Check completed, image has leaked clusters, but is good otherwise
674  * 63 - Checks are not supported by the image format
675  */
676 static int img_check(int argc, char **argv)
677 {
678     int c, ret;
679     OutputFormat output_format = OFORMAT_HUMAN;
680     const char *filename, *fmt, *output, *cache;
681     BlockBackend *blk;
682     BlockDriverState *bs;
683     int fix = 0;
684     int flags = BDRV_O_CHECK;
685     bool writethrough;
686     ImageCheck *check;
687     bool quiet = false;
688     bool image_opts = false;
689     bool force_share = false;
690 
691     fmt = NULL;
692     output = NULL;
693     cache = BDRV_DEFAULT_CACHE;
694 
695     for(;;) {
696         int option_index = 0;
697         static const struct option long_options[] = {
698             {"help", no_argument, 0, 'h'},
699             {"format", required_argument, 0, 'f'},
700             {"repair", required_argument, 0, 'r'},
701             {"output", required_argument, 0, OPTION_OUTPUT},
702             {"object", required_argument, 0, OPTION_OBJECT},
703             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
704             {"force-share", no_argument, 0, 'U'},
705             {0, 0, 0, 0}
706         };
707         c = getopt_long(argc, argv, ":hf:r:T:qU",
708                         long_options, &option_index);
709         if (c == -1) {
710             break;
711         }
712         switch(c) {
713         case ':':
714             missing_argument(argv[optind - 1]);
715             break;
716         case '?':
717             unrecognized_option(argv[optind - 1]);
718             break;
719         case 'h':
720             help();
721             break;
722         case 'f':
723             fmt = optarg;
724             break;
725         case 'r':
726             flags |= BDRV_O_RDWR;
727 
728             if (!strcmp(optarg, "leaks")) {
729                 fix = BDRV_FIX_LEAKS;
730             } else if (!strcmp(optarg, "all")) {
731                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
732             } else {
733                 error_exit("Unknown option value for -r "
734                            "(expecting 'leaks' or 'all'): %s", optarg);
735             }
736             break;
737         case OPTION_OUTPUT:
738             output = optarg;
739             break;
740         case 'T':
741             cache = optarg;
742             break;
743         case 'q':
744             quiet = true;
745             break;
746         case 'U':
747             force_share = true;
748             break;
749         case OPTION_OBJECT: {
750             QemuOpts *opts;
751             opts = qemu_opts_parse_noisily(&qemu_object_opts,
752                                            optarg, true);
753             if (!opts) {
754                 return 1;
755             }
756         }   break;
757         case OPTION_IMAGE_OPTS:
758             image_opts = true;
759             break;
760         }
761     }
762     if (optind != argc - 1) {
763         error_exit("Expecting one image file name");
764     }
765     filename = argv[optind++];
766 
767     if (output && !strcmp(output, "json")) {
768         output_format = OFORMAT_JSON;
769     } else if (output && !strcmp(output, "human")) {
770         output_format = OFORMAT_HUMAN;
771     } else if (output) {
772         error_report("--output must be used with human or json as argument.");
773         return 1;
774     }
775 
776     if (qemu_opts_foreach(&qemu_object_opts,
777                           user_creatable_add_opts_foreach,
778                           qemu_img_object_print_help, &error_fatal)) {
779         return 1;
780     }
781 
782     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
783     if (ret < 0) {
784         error_report("Invalid source cache option: %s", cache);
785         return 1;
786     }
787 
788     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
789                    force_share);
790     if (!blk) {
791         return 1;
792     }
793     bs = blk_bs(blk);
794 
795     check = g_new0(ImageCheck, 1);
796     ret = collect_image_check(bs, check, filename, fmt, fix);
797 
798     if (ret == -ENOTSUP) {
799         error_report("This image format does not support checks");
800         ret = 63;
801         goto fail;
802     }
803 
804     if (check->corruptions_fixed || check->leaks_fixed) {
805         int corruptions_fixed, leaks_fixed;
806 
807         leaks_fixed         = check->leaks_fixed;
808         corruptions_fixed   = check->corruptions_fixed;
809 
810         if (output_format == OFORMAT_HUMAN) {
811             qprintf(quiet,
812                     "The following inconsistencies were found and repaired:\n\n"
813                     "    %" PRId64 " leaked clusters\n"
814                     "    %" PRId64 " corruptions\n\n"
815                     "Double checking the fixed image now...\n",
816                     check->leaks_fixed,
817                     check->corruptions_fixed);
818         }
819 
820         ret = collect_image_check(bs, check, filename, fmt, 0);
821 
822         check->leaks_fixed          = leaks_fixed;
823         check->corruptions_fixed    = corruptions_fixed;
824     }
825 
826     if (!ret) {
827         switch (output_format) {
828         case OFORMAT_HUMAN:
829             dump_human_image_check(check, quiet);
830             break;
831         case OFORMAT_JSON:
832             dump_json_image_check(check, quiet);
833             break;
834         }
835     }
836 
837     if (ret || check->check_errors) {
838         if (ret) {
839             error_report("Check failed: %s", strerror(-ret));
840         } else {
841             error_report("Check failed");
842         }
843         ret = 1;
844         goto fail;
845     }
846 
847     if (check->corruptions) {
848         ret = 2;
849     } else if (check->leaks) {
850         ret = 3;
851     } else {
852         ret = 0;
853     }
854 
855 fail:
856     qapi_free_ImageCheck(check);
857     blk_unref(blk);
858     return ret;
859 }
860 
861 typedef struct CommonBlockJobCBInfo {
862     BlockDriverState *bs;
863     Error **errp;
864 } CommonBlockJobCBInfo;
865 
866 static void common_block_job_cb(void *opaque, int ret)
867 {
868     CommonBlockJobCBInfo *cbi = opaque;
869 
870     if (ret < 0) {
871         error_setg_errno(cbi->errp, -ret, "Block job failed");
872     }
873 }
874 
875 static void run_block_job(BlockJob *job, Error **errp)
876 {
877     AioContext *aio_context = blk_get_aio_context(job->blk);
878     int ret = 0;
879 
880     aio_context_acquire(aio_context);
881     job_ref(&job->job);
882     do {
883         float progress = 0.0f;
884         aio_poll(aio_context, true);
885         if (job->job.progress_total) {
886             progress = (float)job->job.progress_current /
887                        job->job.progress_total * 100.f;
888         }
889         qemu_progress_print(progress, 0);
890     } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
891 
892     if (!job_is_completed(&job->job)) {
893         ret = job_complete_sync(&job->job, errp);
894     } else {
895         ret = job->job.ret;
896     }
897     job_unref(&job->job);
898     aio_context_release(aio_context);
899 
900     /* publish completion progress only when success */
901     if (!ret) {
902         qemu_progress_print(100.f, 0);
903     }
904 }
905 
906 static int img_commit(int argc, char **argv)
907 {
908     int c, ret, flags;
909     const char *filename, *fmt, *cache, *base;
910     BlockBackend *blk;
911     BlockDriverState *bs, *base_bs;
912     BlockJob *job;
913     bool progress = false, quiet = false, drop = false;
914     bool writethrough;
915     Error *local_err = NULL;
916     CommonBlockJobCBInfo cbi;
917     bool image_opts = false;
918     AioContext *aio_context;
919 
920     fmt = NULL;
921     cache = BDRV_DEFAULT_CACHE;
922     base = NULL;
923     for(;;) {
924         static const struct option long_options[] = {
925             {"help", no_argument, 0, 'h'},
926             {"object", required_argument, 0, OPTION_OBJECT},
927             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
928             {0, 0, 0, 0}
929         };
930         c = getopt_long(argc, argv, ":f:ht:b:dpq",
931                         long_options, NULL);
932         if (c == -1) {
933             break;
934         }
935         switch(c) {
936         case ':':
937             missing_argument(argv[optind - 1]);
938             break;
939         case '?':
940             unrecognized_option(argv[optind - 1]);
941             break;
942         case 'h':
943             help();
944             break;
945         case 'f':
946             fmt = optarg;
947             break;
948         case 't':
949             cache = optarg;
950             break;
951         case 'b':
952             base = optarg;
953             /* -b implies -d */
954             drop = true;
955             break;
956         case 'd':
957             drop = true;
958             break;
959         case 'p':
960             progress = true;
961             break;
962         case 'q':
963             quiet = true;
964             break;
965         case OPTION_OBJECT: {
966             QemuOpts *opts;
967             opts = qemu_opts_parse_noisily(&qemu_object_opts,
968                                            optarg, true);
969             if (!opts) {
970                 return 1;
971             }
972         }   break;
973         case OPTION_IMAGE_OPTS:
974             image_opts = true;
975             break;
976         }
977     }
978 
979     /* Progress is not shown in Quiet mode */
980     if (quiet) {
981         progress = false;
982     }
983 
984     if (optind != argc - 1) {
985         error_exit("Expecting one image file name");
986     }
987     filename = argv[optind++];
988 
989     if (qemu_opts_foreach(&qemu_object_opts,
990                           user_creatable_add_opts_foreach,
991                           qemu_img_object_print_help, &error_fatal)) {
992         return 1;
993     }
994 
995     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
996     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
997     if (ret < 0) {
998         error_report("Invalid cache option: %s", cache);
999         return 1;
1000     }
1001 
1002     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1003                    false);
1004     if (!blk) {
1005         return 1;
1006     }
1007     bs = blk_bs(blk);
1008 
1009     qemu_progress_init(progress, 1.f);
1010     qemu_progress_print(0.f, 100);
1011 
1012     if (base) {
1013         base_bs = bdrv_find_backing_image(bs, base);
1014         if (!base_bs) {
1015             error_setg(&local_err,
1016                        "Did not find '%s' in the backing chain of '%s'",
1017                        base, filename);
1018             goto done;
1019         }
1020     } else {
1021         /* This is different from QMP, which by default uses the deepest file in
1022          * the backing chain (i.e., the very base); however, the traditional
1023          * behavior of qemu-img commit is using the immediate backing file. */
1024         base_bs = backing_bs(bs);
1025         if (!base_bs) {
1026             error_setg(&local_err, "Image does not have a backing file");
1027             goto done;
1028         }
1029     }
1030 
1031     cbi = (CommonBlockJobCBInfo){
1032         .errp = &local_err,
1033         .bs   = bs,
1034     };
1035 
1036     aio_context = bdrv_get_aio_context(bs);
1037     aio_context_acquire(aio_context);
1038     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, 0,
1039                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1040                         &cbi, false, &local_err);
1041     aio_context_release(aio_context);
1042     if (local_err) {
1043         goto done;
1044     }
1045 
1046     /* When the block job completes, the BlockBackend reference will point to
1047      * the old backing file. In order to avoid that the top image is already
1048      * deleted, so we can still empty it afterwards, increment the reference
1049      * counter here preemptively. */
1050     if (!drop) {
1051         bdrv_ref(bs);
1052     }
1053 
1054     job = block_job_get("commit");
1055     assert(job);
1056     run_block_job(job, &local_err);
1057     if (local_err) {
1058         goto unref_backing;
1059     }
1060 
1061     if (!drop && bs->drv->bdrv_make_empty) {
1062         ret = bs->drv->bdrv_make_empty(bs);
1063         if (ret) {
1064             error_setg_errno(&local_err, -ret, "Could not empty %s",
1065                              filename);
1066             goto unref_backing;
1067         }
1068     }
1069 
1070 unref_backing:
1071     if (!drop) {
1072         bdrv_unref(bs);
1073     }
1074 
1075 done:
1076     qemu_progress_end();
1077 
1078     blk_unref(blk);
1079 
1080     if (local_err) {
1081         error_report_err(local_err);
1082         return 1;
1083     }
1084 
1085     qprintf(quiet, "Image committed.\n");
1086     return 0;
1087 }
1088 
1089 /*
1090  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1091  * of the first sector boundary within buf where the sector contains a
1092  * non-zero byte.  This function is robust to a buffer that is not
1093  * sector-aligned.
1094  */
1095 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1096 {
1097     int64_t i;
1098     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1099 
1100     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1101         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1102             return i;
1103         }
1104     }
1105     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1106         return i;
1107     }
1108     return -1;
1109 }
1110 
1111 /*
1112  * Returns true iff the first sector pointed to by 'buf' contains at least
1113  * a non-NUL byte.
1114  *
1115  * 'pnum' is set to the number of sectors (including and immediately following
1116  * the first one) that are known to be in the same allocated/unallocated state.
1117  * The function will try to align the end offset to alignment boundaries so
1118  * that the request will at least end aligned and consequtive requests will
1119  * also start at an aligned offset.
1120  */
1121 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1122                                 int64_t sector_num, int alignment)
1123 {
1124     bool is_zero;
1125     int i, tail;
1126 
1127     if (n <= 0) {
1128         *pnum = 0;
1129         return 0;
1130     }
1131     is_zero = buffer_is_zero(buf, 512);
1132     for(i = 1; i < n; i++) {
1133         buf += 512;
1134         if (is_zero != buffer_is_zero(buf, 512)) {
1135             break;
1136         }
1137     }
1138 
1139     tail = (sector_num + i) & (alignment - 1);
1140     if (tail) {
1141         if (is_zero && i <= tail) {
1142             /* treat unallocated areas which only consist
1143              * of a small tail as allocated. */
1144             is_zero = false;
1145         }
1146         if (!is_zero) {
1147             /* align up end offset of allocated areas. */
1148             i += alignment - tail;
1149             i = MIN(i, n);
1150         } else {
1151             /* align down end offset of zero areas. */
1152             i -= tail;
1153         }
1154     }
1155     *pnum = i;
1156     return !is_zero;
1157 }
1158 
1159 /*
1160  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1161  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1162  * breaking up write requests for only small sparse areas.
1163  */
1164 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1165     int min, int64_t sector_num, int alignment)
1166 {
1167     int ret;
1168     int num_checked, num_used;
1169 
1170     if (n < min) {
1171         min = n;
1172     }
1173 
1174     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1175     if (!ret) {
1176         return ret;
1177     }
1178 
1179     num_used = *pnum;
1180     buf += BDRV_SECTOR_SIZE * *pnum;
1181     n -= *pnum;
1182     sector_num += *pnum;
1183     num_checked = num_used;
1184 
1185     while (n > 0) {
1186         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1187 
1188         buf += BDRV_SECTOR_SIZE * *pnum;
1189         n -= *pnum;
1190         sector_num += *pnum;
1191         num_checked += *pnum;
1192         if (ret) {
1193             num_used = num_checked;
1194         } else if (*pnum >= min) {
1195             break;
1196         }
1197     }
1198 
1199     *pnum = num_used;
1200     return 1;
1201 }
1202 
1203 /*
1204  * Compares two buffers sector by sector. Returns 0 if the first
1205  * sector of each buffer matches, non-zero otherwise.
1206  *
1207  * pnum is set to the sector-aligned size of the buffer prefix that
1208  * has the same matching status as the first sector.
1209  */
1210 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1211                            int64_t bytes, int64_t *pnum)
1212 {
1213     bool res;
1214     int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
1215 
1216     assert(bytes > 0);
1217 
1218     res = !!memcmp(buf1, buf2, i);
1219     while (i < bytes) {
1220         int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
1221 
1222         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1223             break;
1224         }
1225         i += len;
1226     }
1227 
1228     *pnum = i;
1229     return res;
1230 }
1231 
1232 #define IO_BUF_SIZE (2 * MiB)
1233 
1234 /*
1235  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1236  *
1237  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1238  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1239  * failure), and 4 on error (the exit status for read errors), after emitting
1240  * an error message.
1241  *
1242  * @param blk:  BlockBackend for the image
1243  * @param offset: Starting offset to check
1244  * @param bytes: Number of bytes to check
1245  * @param filename: Name of disk file we are checking (logging purpose)
1246  * @param buffer: Allocated buffer for storing read data
1247  * @param quiet: Flag for quiet mode
1248  */
1249 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1250                                int64_t bytes, const char *filename,
1251                                uint8_t *buffer, bool quiet)
1252 {
1253     int ret = 0;
1254     int64_t idx;
1255 
1256     ret = blk_pread(blk, offset, buffer, bytes);
1257     if (ret < 0) {
1258         error_report("Error while reading offset %" PRId64 " of %s: %s",
1259                      offset, filename, strerror(-ret));
1260         return 4;
1261     }
1262     idx = find_nonzero(buffer, bytes);
1263     if (idx >= 0) {
1264         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1265                 offset + idx);
1266         return 1;
1267     }
1268 
1269     return 0;
1270 }
1271 
1272 /*
1273  * Compares two images. Exit codes:
1274  *
1275  * 0 - Images are identical
1276  * 1 - Images differ
1277  * >1 - Error occurred
1278  */
1279 static int img_compare(int argc, char **argv)
1280 {
1281     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1282     BlockBackend *blk1, *blk2;
1283     BlockDriverState *bs1, *bs2;
1284     int64_t total_size1, total_size2;
1285     uint8_t *buf1 = NULL, *buf2 = NULL;
1286     int64_t pnum1, pnum2;
1287     int allocated1, allocated2;
1288     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1289     bool progress = false, quiet = false, strict = false;
1290     int flags;
1291     bool writethrough;
1292     int64_t total_size;
1293     int64_t offset = 0;
1294     int64_t chunk;
1295     int c;
1296     uint64_t progress_base;
1297     bool image_opts = false;
1298     bool force_share = false;
1299 
1300     cache = BDRV_DEFAULT_CACHE;
1301     for (;;) {
1302         static const struct option long_options[] = {
1303             {"help", no_argument, 0, 'h'},
1304             {"object", required_argument, 0, OPTION_OBJECT},
1305             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1306             {"force-share", no_argument, 0, 'U'},
1307             {0, 0, 0, 0}
1308         };
1309         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1310                         long_options, NULL);
1311         if (c == -1) {
1312             break;
1313         }
1314         switch (c) {
1315         case ':':
1316             missing_argument(argv[optind - 1]);
1317             break;
1318         case '?':
1319             unrecognized_option(argv[optind - 1]);
1320             break;
1321         case 'h':
1322             help();
1323             break;
1324         case 'f':
1325             fmt1 = optarg;
1326             break;
1327         case 'F':
1328             fmt2 = optarg;
1329             break;
1330         case 'T':
1331             cache = optarg;
1332             break;
1333         case 'p':
1334             progress = true;
1335             break;
1336         case 'q':
1337             quiet = true;
1338             break;
1339         case 's':
1340             strict = true;
1341             break;
1342         case 'U':
1343             force_share = true;
1344             break;
1345         case OPTION_OBJECT: {
1346             QemuOpts *opts;
1347             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1348                                            optarg, true);
1349             if (!opts) {
1350                 ret = 2;
1351                 goto out4;
1352             }
1353         }   break;
1354         case OPTION_IMAGE_OPTS:
1355             image_opts = true;
1356             break;
1357         }
1358     }
1359 
1360     /* Progress is not shown in Quiet mode */
1361     if (quiet) {
1362         progress = false;
1363     }
1364 
1365 
1366     if (optind != argc - 2) {
1367         error_exit("Expecting two image file names");
1368     }
1369     filename1 = argv[optind++];
1370     filename2 = argv[optind++];
1371 
1372     if (qemu_opts_foreach(&qemu_object_opts,
1373                           user_creatable_add_opts_foreach,
1374                           qemu_img_object_print_help, &error_fatal)) {
1375         ret = 2;
1376         goto out4;
1377     }
1378 
1379     /* Initialize before goto out */
1380     qemu_progress_init(progress, 2.0);
1381 
1382     flags = 0;
1383     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1384     if (ret < 0) {
1385         error_report("Invalid source cache option: %s", cache);
1386         ret = 2;
1387         goto out3;
1388     }
1389 
1390     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1391                     force_share);
1392     if (!blk1) {
1393         ret = 2;
1394         goto out3;
1395     }
1396 
1397     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1398                     force_share);
1399     if (!blk2) {
1400         ret = 2;
1401         goto out2;
1402     }
1403     bs1 = blk_bs(blk1);
1404     bs2 = blk_bs(blk2);
1405 
1406     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1407     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1408     total_size1 = blk_getlength(blk1);
1409     if (total_size1 < 0) {
1410         error_report("Can't get size of %s: %s",
1411                      filename1, strerror(-total_size1));
1412         ret = 4;
1413         goto out;
1414     }
1415     total_size2 = blk_getlength(blk2);
1416     if (total_size2 < 0) {
1417         error_report("Can't get size of %s: %s",
1418                      filename2, strerror(-total_size2));
1419         ret = 4;
1420         goto out;
1421     }
1422     total_size = MIN(total_size1, total_size2);
1423     progress_base = MAX(total_size1, total_size2);
1424 
1425     qemu_progress_print(0, 100);
1426 
1427     if (strict && total_size1 != total_size2) {
1428         ret = 1;
1429         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1430         goto out;
1431     }
1432 
1433     while (offset < total_size) {
1434         int status1, status2;
1435 
1436         status1 = bdrv_block_status_above(bs1, NULL, offset,
1437                                           total_size1 - offset, &pnum1, NULL,
1438                                           NULL);
1439         if (status1 < 0) {
1440             ret = 3;
1441             error_report("Sector allocation test failed for %s", filename1);
1442             goto out;
1443         }
1444         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1445 
1446         status2 = bdrv_block_status_above(bs2, NULL, offset,
1447                                           total_size2 - offset, &pnum2, NULL,
1448                                           NULL);
1449         if (status2 < 0) {
1450             ret = 3;
1451             error_report("Sector allocation test failed for %s", filename2);
1452             goto out;
1453         }
1454         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1455 
1456         assert(pnum1 && pnum2);
1457         chunk = MIN(pnum1, pnum2);
1458 
1459         if (strict) {
1460             if (status1 != status2) {
1461                 ret = 1;
1462                 qprintf(quiet, "Strict mode: Offset %" PRId64
1463                         " block status mismatch!\n", offset);
1464                 goto out;
1465             }
1466         }
1467         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1468             /* nothing to do */
1469         } else if (allocated1 == allocated2) {
1470             if (allocated1) {
1471                 int64_t pnum;
1472 
1473                 chunk = MIN(chunk, IO_BUF_SIZE);
1474                 ret = blk_pread(blk1, offset, buf1, chunk);
1475                 if (ret < 0) {
1476                     error_report("Error while reading offset %" PRId64
1477                                  " of %s: %s",
1478                                  offset, filename1, strerror(-ret));
1479                     ret = 4;
1480                     goto out;
1481                 }
1482                 ret = blk_pread(blk2, offset, buf2, chunk);
1483                 if (ret < 0) {
1484                     error_report("Error while reading offset %" PRId64
1485                                  " of %s: %s",
1486                                  offset, filename2, strerror(-ret));
1487                     ret = 4;
1488                     goto out;
1489                 }
1490                 ret = compare_buffers(buf1, buf2, chunk, &pnum);
1491                 if (ret || pnum != chunk) {
1492                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1493                             offset + (ret ? 0 : pnum));
1494                     ret = 1;
1495                     goto out;
1496                 }
1497             }
1498         } else {
1499             chunk = MIN(chunk, IO_BUF_SIZE);
1500             if (allocated1) {
1501                 ret = check_empty_sectors(blk1, offset, chunk,
1502                                           filename1, buf1, quiet);
1503             } else {
1504                 ret = check_empty_sectors(blk2, offset, chunk,
1505                                           filename2, buf1, quiet);
1506             }
1507             if (ret) {
1508                 goto out;
1509             }
1510         }
1511         offset += chunk;
1512         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1513     }
1514 
1515     if (total_size1 != total_size2) {
1516         BlockBackend *blk_over;
1517         const char *filename_over;
1518 
1519         qprintf(quiet, "Warning: Image size mismatch!\n");
1520         if (total_size1 > total_size2) {
1521             blk_over = blk1;
1522             filename_over = filename1;
1523         } else {
1524             blk_over = blk2;
1525             filename_over = filename2;
1526         }
1527 
1528         while (offset < progress_base) {
1529             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1530                                           progress_base - offset, &chunk,
1531                                           NULL, NULL);
1532             if (ret < 0) {
1533                 ret = 3;
1534                 error_report("Sector allocation test failed for %s",
1535                              filename_over);
1536                 goto out;
1537 
1538             }
1539             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1540                 chunk = MIN(chunk, IO_BUF_SIZE);
1541                 ret = check_empty_sectors(blk_over, offset, chunk,
1542                                           filename_over, buf1, quiet);
1543                 if (ret) {
1544                     goto out;
1545                 }
1546             }
1547             offset += chunk;
1548             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1549         }
1550     }
1551 
1552     qprintf(quiet, "Images are identical.\n");
1553     ret = 0;
1554 
1555 out:
1556     qemu_vfree(buf1);
1557     qemu_vfree(buf2);
1558     blk_unref(blk2);
1559 out2:
1560     blk_unref(blk1);
1561 out3:
1562     qemu_progress_end();
1563 out4:
1564     return ret;
1565 }
1566 
1567 enum ImgConvertBlockStatus {
1568     BLK_DATA,
1569     BLK_ZERO,
1570     BLK_BACKING_FILE,
1571 };
1572 
1573 #define MAX_COROUTINES 16
1574 
1575 typedef struct ImgConvertState {
1576     BlockBackend **src;
1577     int64_t *src_sectors;
1578     int src_num;
1579     int64_t total_sectors;
1580     int64_t allocated_sectors;
1581     int64_t allocated_done;
1582     int64_t sector_num;
1583     int64_t wr_offs;
1584     enum ImgConvertBlockStatus status;
1585     int64_t sector_next_status;
1586     BlockBackend *target;
1587     bool has_zero_init;
1588     bool compressed;
1589     bool unallocated_blocks_are_zero;
1590     bool target_is_new;
1591     bool target_has_backing;
1592     int64_t target_backing_sectors; /* negative if unknown */
1593     bool wr_in_order;
1594     bool copy_range;
1595     bool salvage;
1596     bool quiet;
1597     int min_sparse;
1598     int alignment;
1599     size_t cluster_sectors;
1600     size_t buf_sectors;
1601     long num_coroutines;
1602     int running_coroutines;
1603     Coroutine *co[MAX_COROUTINES];
1604     int64_t wait_sector_num[MAX_COROUTINES];
1605     CoMutex lock;
1606     int ret;
1607 } ImgConvertState;
1608 
1609 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1610                                 int *src_cur, int64_t *src_cur_offset)
1611 {
1612     *src_cur = 0;
1613     *src_cur_offset = 0;
1614     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1615         *src_cur_offset += s->src_sectors[*src_cur];
1616         (*src_cur)++;
1617         assert(*src_cur < s->src_num);
1618     }
1619 }
1620 
1621 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1622 {
1623     int64_t src_cur_offset;
1624     int ret, n, src_cur;
1625     bool post_backing_zero = false;
1626 
1627     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1628 
1629     assert(s->total_sectors > sector_num);
1630     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1631 
1632     if (s->target_backing_sectors >= 0) {
1633         if (sector_num >= s->target_backing_sectors) {
1634             post_backing_zero = s->unallocated_blocks_are_zero;
1635         } else if (sector_num + n > s->target_backing_sectors) {
1636             /* Split requests around target_backing_sectors (because
1637              * starting from there, zeros are handled differently) */
1638             n = s->target_backing_sectors - sector_num;
1639         }
1640     }
1641 
1642     if (s->sector_next_status <= sector_num) {
1643         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1644         int64_t count;
1645 
1646         do {
1647             count = n * BDRV_SECTOR_SIZE;
1648 
1649             if (s->target_has_backing) {
1650                 ret = bdrv_block_status(blk_bs(s->src[src_cur]), offset,
1651                                         count, &count, NULL, NULL);
1652             } else {
1653                 ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL,
1654                                               offset, count, &count, NULL,
1655                                               NULL);
1656             }
1657 
1658             if (ret < 0) {
1659                 if (s->salvage) {
1660                     if (n == 1) {
1661                         if (!s->quiet) {
1662                             warn_report("error while reading block status at "
1663                                         "offset %" PRIu64 ": %s", offset,
1664                                         strerror(-ret));
1665                         }
1666                         /* Just try to read the data, then */
1667                         ret = BDRV_BLOCK_DATA;
1668                         count = BDRV_SECTOR_SIZE;
1669                     } else {
1670                         /* Retry on a shorter range */
1671                         n = DIV_ROUND_UP(n, 4);
1672                     }
1673                 } else {
1674                     error_report("error while reading block status at offset "
1675                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1676                     return ret;
1677                 }
1678             }
1679         } while (ret < 0);
1680 
1681         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1682 
1683         if (ret & BDRV_BLOCK_ZERO) {
1684             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1685         } else if (ret & BDRV_BLOCK_DATA) {
1686             s->status = BLK_DATA;
1687         } else {
1688             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1689         }
1690 
1691         s->sector_next_status = sector_num + n;
1692     }
1693 
1694     n = MIN(n, s->sector_next_status - sector_num);
1695     if (s->status == BLK_DATA) {
1696         n = MIN(n, s->buf_sectors);
1697     }
1698 
1699     /* We need to write complete clusters for compressed images, so if an
1700      * unallocated area is shorter than that, we must consider the whole
1701      * cluster allocated. */
1702     if (s->compressed) {
1703         if (n < s->cluster_sectors) {
1704             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1705             s->status = BLK_DATA;
1706         } else {
1707             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1708         }
1709     }
1710 
1711     return n;
1712 }
1713 
1714 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1715                                         int nb_sectors, uint8_t *buf)
1716 {
1717     uint64_t single_read_until = 0;
1718     int n, ret;
1719 
1720     assert(nb_sectors <= s->buf_sectors);
1721     while (nb_sectors > 0) {
1722         BlockBackend *blk;
1723         int src_cur;
1724         int64_t bs_sectors, src_cur_offset;
1725         uint64_t offset;
1726 
1727         /* In the case of compression with multiple source files, we can get a
1728          * nb_sectors that spreads into the next part. So we must be able to
1729          * read across multiple BDSes for one convert_read() call. */
1730         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1731         blk = s->src[src_cur];
1732         bs_sectors = s->src_sectors[src_cur];
1733 
1734         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1735 
1736         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1737         if (single_read_until > offset) {
1738             n = 1;
1739         }
1740 
1741         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1742         if (ret < 0) {
1743             if (s->salvage) {
1744                 if (n > 1) {
1745                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1746                     continue;
1747                 } else {
1748                     if (!s->quiet) {
1749                         warn_report("error while reading offset %" PRIu64
1750                                     ": %s", offset, strerror(-ret));
1751                     }
1752                     memset(buf, 0, BDRV_SECTOR_SIZE);
1753                 }
1754             } else {
1755                 return ret;
1756             }
1757         }
1758 
1759         sector_num += n;
1760         nb_sectors -= n;
1761         buf += n * BDRV_SECTOR_SIZE;
1762     }
1763 
1764     return 0;
1765 }
1766 
1767 
1768 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1769                                          int nb_sectors, uint8_t *buf,
1770                                          enum ImgConvertBlockStatus status)
1771 {
1772     int ret;
1773 
1774     while (nb_sectors > 0) {
1775         int n = nb_sectors;
1776         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1777 
1778         switch (status) {
1779         case BLK_BACKING_FILE:
1780             /* If we have a backing file, leave clusters unallocated that are
1781              * unallocated in the source image, so that the backing file is
1782              * visible at the respective offset. */
1783             assert(s->target_has_backing);
1784             break;
1785 
1786         case BLK_DATA:
1787             /* If we're told to keep the target fully allocated (-S 0) or there
1788              * is real non-zero data, we must write it. Otherwise we can treat
1789              * it as zero sectors.
1790              * Compressed clusters need to be written as a whole, so in that
1791              * case we can only save the write if the buffer is completely
1792              * zeroed. */
1793             if (!s->min_sparse ||
1794                 (!s->compressed &&
1795                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1796                                           sector_num, s->alignment)) ||
1797                 (s->compressed &&
1798                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1799             {
1800                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1801                                     n << BDRV_SECTOR_BITS, buf, flags);
1802                 if (ret < 0) {
1803                     return ret;
1804                 }
1805                 break;
1806             }
1807             /* fall-through */
1808 
1809         case BLK_ZERO:
1810             if (s->has_zero_init) {
1811                 assert(!s->target_has_backing);
1812                 break;
1813             }
1814             ret = blk_co_pwrite_zeroes(s->target,
1815                                        sector_num << BDRV_SECTOR_BITS,
1816                                        n << BDRV_SECTOR_BITS,
1817                                        BDRV_REQ_MAY_UNMAP);
1818             if (ret < 0) {
1819                 return ret;
1820             }
1821             break;
1822         }
1823 
1824         sector_num += n;
1825         nb_sectors -= n;
1826         buf += n * BDRV_SECTOR_SIZE;
1827     }
1828 
1829     return 0;
1830 }
1831 
1832 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1833                                               int nb_sectors)
1834 {
1835     int n, ret;
1836 
1837     while (nb_sectors > 0) {
1838         BlockBackend *blk;
1839         int src_cur;
1840         int64_t bs_sectors, src_cur_offset;
1841         int64_t offset;
1842 
1843         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1844         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1845         blk = s->src[src_cur];
1846         bs_sectors = s->src_sectors[src_cur];
1847 
1848         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1849 
1850         ret = blk_co_copy_range(blk, offset, s->target,
1851                                 sector_num << BDRV_SECTOR_BITS,
1852                                 n << BDRV_SECTOR_BITS, 0, 0);
1853         if (ret < 0) {
1854             return ret;
1855         }
1856 
1857         sector_num += n;
1858         nb_sectors -= n;
1859     }
1860     return 0;
1861 }
1862 
1863 static void coroutine_fn convert_co_do_copy(void *opaque)
1864 {
1865     ImgConvertState *s = opaque;
1866     uint8_t *buf = NULL;
1867     int ret, i;
1868     int index = -1;
1869 
1870     for (i = 0; i < s->num_coroutines; i++) {
1871         if (s->co[i] == qemu_coroutine_self()) {
1872             index = i;
1873             break;
1874         }
1875     }
1876     assert(index >= 0);
1877 
1878     s->running_coroutines++;
1879     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1880 
1881     while (1) {
1882         int n;
1883         int64_t sector_num;
1884         enum ImgConvertBlockStatus status;
1885         bool copy_range;
1886 
1887         qemu_co_mutex_lock(&s->lock);
1888         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1889             qemu_co_mutex_unlock(&s->lock);
1890             break;
1891         }
1892         n = convert_iteration_sectors(s, s->sector_num);
1893         if (n < 0) {
1894             qemu_co_mutex_unlock(&s->lock);
1895             s->ret = n;
1896             break;
1897         }
1898         /* save current sector and allocation status to local variables */
1899         sector_num = s->sector_num;
1900         status = s->status;
1901         if (!s->min_sparse && s->status == BLK_ZERO) {
1902             n = MIN(n, s->buf_sectors);
1903         }
1904         /* increment global sector counter so that other coroutines can
1905          * already continue reading beyond this request */
1906         s->sector_num += n;
1907         qemu_co_mutex_unlock(&s->lock);
1908 
1909         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
1910             s->allocated_done += n;
1911             qemu_progress_print(100.0 * s->allocated_done /
1912                                         s->allocated_sectors, 0);
1913         }
1914 
1915 retry:
1916         copy_range = s->copy_range && s->status == BLK_DATA;
1917         if (status == BLK_DATA && !copy_range) {
1918             ret = convert_co_read(s, sector_num, n, buf);
1919             if (ret < 0) {
1920                 error_report("error while reading sector %" PRId64
1921                              ": %s", sector_num, strerror(-ret));
1922                 s->ret = ret;
1923             }
1924         } else if (!s->min_sparse && status == BLK_ZERO) {
1925             status = BLK_DATA;
1926             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
1927         }
1928 
1929         if (s->wr_in_order) {
1930             /* keep writes in order */
1931             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
1932                 s->wait_sector_num[index] = sector_num;
1933                 qemu_coroutine_yield();
1934             }
1935             s->wait_sector_num[index] = -1;
1936         }
1937 
1938         if (s->ret == -EINPROGRESS) {
1939             if (copy_range) {
1940                 ret = convert_co_copy_range(s, sector_num, n);
1941                 if (ret) {
1942                     s->copy_range = false;
1943                     goto retry;
1944                 }
1945             } else {
1946                 ret = convert_co_write(s, sector_num, n, buf, status);
1947             }
1948             if (ret < 0) {
1949                 error_report("error while writing sector %" PRId64
1950                              ": %s", sector_num, strerror(-ret));
1951                 s->ret = ret;
1952             }
1953         }
1954 
1955         if (s->wr_in_order) {
1956             /* reenter the coroutine that might have waited
1957              * for this write to complete */
1958             s->wr_offs = sector_num + n;
1959             for (i = 0; i < s->num_coroutines; i++) {
1960                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
1961                     /*
1962                      * A -> B -> A cannot occur because A has
1963                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
1964                      * B will never enter A during this time window.
1965                      */
1966                     qemu_coroutine_enter(s->co[i]);
1967                     break;
1968                 }
1969             }
1970         }
1971     }
1972 
1973     qemu_vfree(buf);
1974     s->co[index] = NULL;
1975     s->running_coroutines--;
1976     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
1977         /* the convert job finished successfully */
1978         s->ret = 0;
1979     }
1980 }
1981 
1982 static int convert_do_copy(ImgConvertState *s)
1983 {
1984     int ret, i, n;
1985     int64_t sector_num = 0;
1986 
1987     /* Check whether we have zero initialisation or can get it efficiently */
1988     if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
1989         !s->target_has_backing) {
1990         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
1991     }
1992 
1993     if (!s->has_zero_init && !s->target_has_backing &&
1994         bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
1995     {
1996         ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
1997         if (ret == 0) {
1998             s->has_zero_init = true;
1999         }
2000     }
2001 
2002     /* Allocate buffer for copied data. For compressed images, only one cluster
2003      * can be copied at a time. */
2004     if (s->compressed) {
2005         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2006             error_report("invalid cluster size");
2007             return -EINVAL;
2008         }
2009         s->buf_sectors = s->cluster_sectors;
2010     }
2011 
2012     while (sector_num < s->total_sectors) {
2013         n = convert_iteration_sectors(s, sector_num);
2014         if (n < 0) {
2015             return n;
2016         }
2017         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2018         {
2019             s->allocated_sectors += n;
2020         }
2021         sector_num += n;
2022     }
2023 
2024     /* Do the copy */
2025     s->sector_next_status = 0;
2026     s->ret = -EINPROGRESS;
2027 
2028     qemu_co_mutex_init(&s->lock);
2029     for (i = 0; i < s->num_coroutines; i++) {
2030         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2031         s->wait_sector_num[i] = -1;
2032         qemu_coroutine_enter(s->co[i]);
2033     }
2034 
2035     while (s->running_coroutines) {
2036         main_loop_wait(false);
2037     }
2038 
2039     if (s->compressed && !s->ret) {
2040         /* signal EOF to align */
2041         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
2042         if (ret < 0) {
2043             return ret;
2044         }
2045     }
2046 
2047     return s->ret;
2048 }
2049 
2050 #define MAX_BUF_SECTORS 32768
2051 
2052 static int img_convert(int argc, char **argv)
2053 {
2054     int c, bs_i, flags, src_flags = 0;
2055     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2056                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2057                *out_filename, *out_baseimg_param, *snapshot_name = NULL;
2058     BlockDriver *drv = NULL, *proto_drv = NULL;
2059     BlockDriverInfo bdi;
2060     BlockDriverState *out_bs;
2061     QemuOpts *opts = NULL, *sn_opts = NULL;
2062     QemuOptsList *create_opts = NULL;
2063     QDict *open_opts = NULL;
2064     char *options = NULL;
2065     Error *local_err = NULL;
2066     bool writethrough, src_writethrough, image_opts = false,
2067          skip_create = false, progress = false, tgt_image_opts = false;
2068     int64_t ret = -EINVAL;
2069     bool force_share = false;
2070     bool explict_min_sparse = false;
2071 
2072     ImgConvertState s = (ImgConvertState) {
2073         /* Need at least 4k of zeros for sparse detection */
2074         .min_sparse         = 8,
2075         .copy_range         = false,
2076         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2077         .wr_in_order        = true,
2078         .num_coroutines     = 8,
2079     };
2080 
2081     for(;;) {
2082         static const struct option long_options[] = {
2083             {"help", no_argument, 0, 'h'},
2084             {"object", required_argument, 0, OPTION_OBJECT},
2085             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2086             {"force-share", no_argument, 0, 'U'},
2087             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2088             {"salvage", no_argument, 0, OPTION_SALVAGE},
2089             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2090             {0, 0, 0, 0}
2091         };
2092         c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU",
2093                         long_options, NULL);
2094         if (c == -1) {
2095             break;
2096         }
2097         switch(c) {
2098         case ':':
2099             missing_argument(argv[optind - 1]);
2100             break;
2101         case '?':
2102             unrecognized_option(argv[optind - 1]);
2103             break;
2104         case 'h':
2105             help();
2106             break;
2107         case 'f':
2108             fmt = optarg;
2109             break;
2110         case 'O':
2111             out_fmt = optarg;
2112             break;
2113         case 'B':
2114             out_baseimg = optarg;
2115             break;
2116         case 'C':
2117             s.copy_range = true;
2118             break;
2119         case 'c':
2120             s.compressed = true;
2121             break;
2122         case 'o':
2123             if (!is_valid_option_list(optarg)) {
2124                 error_report("Invalid option list: %s", optarg);
2125                 goto fail_getopt;
2126             }
2127             if (!options) {
2128                 options = g_strdup(optarg);
2129             } else {
2130                 char *old_options = options;
2131                 options = g_strdup_printf("%s,%s", options, optarg);
2132                 g_free(old_options);
2133             }
2134             break;
2135         case 'l':
2136             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2137                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2138                                                   optarg, false);
2139                 if (!sn_opts) {
2140                     error_report("Failed in parsing snapshot param '%s'",
2141                                  optarg);
2142                     goto fail_getopt;
2143                 }
2144             } else {
2145                 snapshot_name = optarg;
2146             }
2147             break;
2148         case 'S':
2149         {
2150             int64_t sval;
2151 
2152             sval = cvtnum(optarg);
2153             if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2154                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2155                 error_report("Invalid buffer size for sparse output specified. "
2156                     "Valid sizes are multiples of %llu up to %llu. Select "
2157                     "0 to disable sparse detection (fully allocates output).",
2158                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2159                 goto fail_getopt;
2160             }
2161 
2162             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2163             explict_min_sparse = true;
2164             break;
2165         }
2166         case 'p':
2167             progress = true;
2168             break;
2169         case 't':
2170             cache = optarg;
2171             break;
2172         case 'T':
2173             src_cache = optarg;
2174             break;
2175         case 'q':
2176             s.quiet = true;
2177             break;
2178         case 'n':
2179             skip_create = true;
2180             break;
2181         case 'm':
2182             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2183                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2184                 error_report("Invalid number of coroutines. Allowed number of"
2185                              " coroutines is between 1 and %d", MAX_COROUTINES);
2186                 goto fail_getopt;
2187             }
2188             break;
2189         case 'W':
2190             s.wr_in_order = false;
2191             break;
2192         case 'U':
2193             force_share = true;
2194             break;
2195         case OPTION_OBJECT: {
2196             QemuOpts *object_opts;
2197             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
2198                                                   optarg, true);
2199             if (!object_opts) {
2200                 goto fail_getopt;
2201             }
2202             break;
2203         }
2204         case OPTION_IMAGE_OPTS:
2205             image_opts = true;
2206             break;
2207         case OPTION_SALVAGE:
2208             s.salvage = true;
2209             break;
2210         case OPTION_TARGET_IMAGE_OPTS:
2211             tgt_image_opts = true;
2212             break;
2213         case OPTION_TARGET_IS_ZERO:
2214             /*
2215              * The user asserting that the target is blank has the
2216              * same effect as the target driver supporting zero
2217              * initialisation.
2218              */
2219             s.has_zero_init = true;
2220             break;
2221         }
2222     }
2223 
2224     if (!out_fmt && !tgt_image_opts) {
2225         out_fmt = "raw";
2226     }
2227 
2228     if (qemu_opts_foreach(&qemu_object_opts,
2229                           user_creatable_add_opts_foreach,
2230                           qemu_img_object_print_help, &error_fatal)) {
2231         goto fail_getopt;
2232     }
2233 
2234     if (s.compressed && s.copy_range) {
2235         error_report("Cannot enable copy offloading when -c is used");
2236         goto fail_getopt;
2237     }
2238 
2239     if (explict_min_sparse && s.copy_range) {
2240         error_report("Cannot enable copy offloading when -S is used");
2241         goto fail_getopt;
2242     }
2243 
2244     if (s.copy_range && s.salvage) {
2245         error_report("Cannot use copy offloading in salvaging mode");
2246         goto fail_getopt;
2247     }
2248 
2249     if (tgt_image_opts && !skip_create) {
2250         error_report("--target-image-opts requires use of -n flag");
2251         goto fail_getopt;
2252     }
2253 
2254     if (skip_create && options) {
2255         warn_report("-o has no effect when skipping image creation");
2256         warn_report("This will become an error in future QEMU versions.");
2257     }
2258 
2259     if (s.has_zero_init && !skip_create) {
2260         error_report("--target-is-zero requires use of -n flag");
2261         goto fail_getopt;
2262     }
2263 
2264     s.src_num = argc - optind - 1;
2265     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2266 
2267     if (options && has_help_option(options)) {
2268         if (out_fmt) {
2269             ret = print_block_option_help(out_filename, out_fmt);
2270             goto fail_getopt;
2271         } else {
2272             error_report("Option help requires a format be specified");
2273             goto fail_getopt;
2274         }
2275     }
2276 
2277     if (s.src_num < 1) {
2278         error_report("Must specify image file name");
2279         goto fail_getopt;
2280     }
2281 
2282 
2283     /* ret is still -EINVAL until here */
2284     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2285     if (ret < 0) {
2286         error_report("Invalid source cache option: %s", src_cache);
2287         goto fail_getopt;
2288     }
2289 
2290     /* Initialize before goto out */
2291     if (s.quiet) {
2292         progress = false;
2293     }
2294     qemu_progress_init(progress, 1.0);
2295     qemu_progress_print(0, 100);
2296 
2297     s.src = g_new0(BlockBackend *, s.src_num);
2298     s.src_sectors = g_new(int64_t, s.src_num);
2299 
2300     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2301         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2302                                fmt, src_flags, src_writethrough, s.quiet,
2303                                force_share);
2304         if (!s.src[bs_i]) {
2305             ret = -1;
2306             goto out;
2307         }
2308         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2309         if (s.src_sectors[bs_i] < 0) {
2310             error_report("Could not get size of %s: %s",
2311                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2312             ret = -1;
2313             goto out;
2314         }
2315         s.total_sectors += s.src_sectors[bs_i];
2316     }
2317 
2318     if (sn_opts) {
2319         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2320                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2321                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2322                                &local_err);
2323     } else if (snapshot_name != NULL) {
2324         if (s.src_num > 1) {
2325             error_report("No support for concatenating multiple snapshot");
2326             ret = -1;
2327             goto out;
2328         }
2329 
2330         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2331                                              &local_err);
2332     }
2333     if (local_err) {
2334         error_reportf_err(local_err, "Failed to load snapshot: ");
2335         ret = -1;
2336         goto out;
2337     }
2338 
2339     if (!skip_create) {
2340         /* Find driver and parse its options */
2341         drv = bdrv_find_format(out_fmt);
2342         if (!drv) {
2343             error_report("Unknown file format '%s'", out_fmt);
2344             ret = -1;
2345             goto out;
2346         }
2347 
2348         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2349         if (!proto_drv) {
2350             error_report_err(local_err);
2351             ret = -1;
2352             goto out;
2353         }
2354 
2355         if (!drv->create_opts) {
2356             error_report("Format driver '%s' does not support image creation",
2357                          drv->format_name);
2358             ret = -1;
2359             goto out;
2360         }
2361 
2362         if (!proto_drv->create_opts) {
2363             error_report("Protocol driver '%s' does not support image creation",
2364                          proto_drv->format_name);
2365             ret = -1;
2366             goto out;
2367         }
2368 
2369         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2370         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2371 
2372         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2373         if (options) {
2374             qemu_opts_do_parse(opts, options, NULL, &local_err);
2375             if (local_err) {
2376                 error_report_err(local_err);
2377                 ret = -1;
2378                 goto out;
2379             }
2380         }
2381 
2382         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s.total_sectors * 512,
2383                             &error_abort);
2384         ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2385         if (ret < 0) {
2386             goto out;
2387         }
2388     }
2389 
2390     /* Get backing file name if -o backing_file was used */
2391     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2392     if (out_baseimg_param) {
2393         out_baseimg = out_baseimg_param;
2394     }
2395     s.target_has_backing = (bool) out_baseimg;
2396 
2397     if (s.has_zero_init && s.target_has_backing) {
2398         error_report("Cannot use --target-is-zero when the destination "
2399                      "image has a backing file");
2400         goto out;
2401     }
2402 
2403     if (s.src_num > 1 && out_baseimg) {
2404         error_report("Having a backing file for the target makes no sense when "
2405                      "concatenating multiple input images");
2406         ret = -1;
2407         goto out;
2408     }
2409 
2410     /* Check if compression is supported */
2411     if (s.compressed) {
2412         bool encryption =
2413             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2414         const char *encryptfmt =
2415             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2416         const char *preallocation =
2417             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2418 
2419         if (drv && !block_driver_can_compress(drv)) {
2420             error_report("Compression not supported for this file format");
2421             ret = -1;
2422             goto out;
2423         }
2424 
2425         if (encryption || encryptfmt) {
2426             error_report("Compression and encryption not supported at "
2427                          "the same time");
2428             ret = -1;
2429             goto out;
2430         }
2431 
2432         if (preallocation
2433             && strcmp(preallocation, "off"))
2434         {
2435             error_report("Compression and preallocation not supported at "
2436                          "the same time");
2437             ret = -1;
2438             goto out;
2439         }
2440     }
2441 
2442     /*
2443      * The later open call will need any decryption secrets, and
2444      * bdrv_create() will purge "opts", so extract them now before
2445      * they are lost.
2446      */
2447     if (!skip_create) {
2448         open_opts = qdict_new();
2449         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2450     }
2451 
2452     if (!skip_create) {
2453         /* Create the new image */
2454         ret = bdrv_create(drv, out_filename, opts, &local_err);
2455         if (ret < 0) {
2456             error_reportf_err(local_err, "%s: error while converting %s: ",
2457                               out_filename, out_fmt);
2458             goto out;
2459         }
2460     }
2461 
2462     s.target_is_new = !skip_create;
2463 
2464     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2465     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2466     if (ret < 0) {
2467         error_report("Invalid cache option: %s", cache);
2468         goto out;
2469     }
2470 
2471     if (skip_create) {
2472         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2473                             flags, writethrough, s.quiet, false);
2474     } else {
2475         /* TODO ultimately we should allow --target-image-opts
2476          * to be used even when -n is not given.
2477          * That has to wait for bdrv_create to be improved
2478          * to allow filenames in option syntax
2479          */
2480         s.target = img_open_file(out_filename, open_opts, out_fmt,
2481                                  flags, writethrough, s.quiet, false);
2482         open_opts = NULL; /* blk_new_open will have freed it */
2483     }
2484     if (!s.target) {
2485         ret = -1;
2486         goto out;
2487     }
2488     out_bs = blk_bs(s.target);
2489 
2490     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2491         error_report("Compression not supported for this file format");
2492         ret = -1;
2493         goto out;
2494     }
2495 
2496     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2497      * or discard_alignment of the out_bs is greater. Limit to
2498      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2499     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2500                         MAX(s.buf_sectors,
2501                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2502                                 out_bs->bl.pdiscard_alignment >>
2503                                 BDRV_SECTOR_BITS)));
2504 
2505     /* try to align the write requests to the destination to avoid unnecessary
2506      * RMW cycles. */
2507     s.alignment = MAX(pow2floor(s.min_sparse),
2508                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2509                                    BDRV_SECTOR_SIZE));
2510     assert(is_power_of_2(s.alignment));
2511 
2512     if (skip_create) {
2513         int64_t output_sectors = blk_nb_sectors(s.target);
2514         if (output_sectors < 0) {
2515             error_report("unable to get output image length: %s",
2516                          strerror(-output_sectors));
2517             ret = -1;
2518             goto out;
2519         } else if (output_sectors < s.total_sectors) {
2520             error_report("output file is smaller than input file");
2521             ret = -1;
2522             goto out;
2523         }
2524     }
2525 
2526     if (s.target_has_backing && s.target_is_new) {
2527         /* Errors are treated as "backing length unknown" (which means
2528          * s.target_backing_sectors has to be negative, which it will
2529          * be automatically).  The backing file length is used only
2530          * for optimizations, so such a case is not fatal. */
2531         s.target_backing_sectors = bdrv_nb_sectors(out_bs->backing->bs);
2532     } else {
2533         s.target_backing_sectors = -1;
2534     }
2535 
2536     ret = bdrv_get_info(out_bs, &bdi);
2537     if (ret < 0) {
2538         if (s.compressed) {
2539             error_report("could not get block driver info");
2540             goto out;
2541         }
2542     } else {
2543         s.compressed = s.compressed || bdi.needs_compressed_writes;
2544         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2545         s.unallocated_blocks_are_zero = bdi.unallocated_blocks_are_zero;
2546     }
2547 
2548     ret = convert_do_copy(&s);
2549 out:
2550     if (!ret) {
2551         qemu_progress_print(100, 0);
2552     }
2553     qemu_progress_end();
2554     qemu_opts_del(opts);
2555     qemu_opts_free(create_opts);
2556     qemu_opts_del(sn_opts);
2557     qobject_unref(open_opts);
2558     blk_unref(s.target);
2559     if (s.src) {
2560         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2561             blk_unref(s.src[bs_i]);
2562         }
2563         g_free(s.src);
2564     }
2565     g_free(s.src_sectors);
2566 fail_getopt:
2567     g_free(options);
2568 
2569     return !!ret;
2570 }
2571 
2572 
2573 static void dump_snapshots(BlockDriverState *bs)
2574 {
2575     QEMUSnapshotInfo *sn_tab, *sn;
2576     int nb_sns, i;
2577 
2578     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2579     if (nb_sns <= 0)
2580         return;
2581     printf("Snapshot list:\n");
2582     bdrv_snapshot_dump(NULL);
2583     printf("\n");
2584     for(i = 0; i < nb_sns; i++) {
2585         sn = &sn_tab[i];
2586         bdrv_snapshot_dump(sn);
2587         printf("\n");
2588     }
2589     g_free(sn_tab);
2590 }
2591 
2592 static void dump_json_image_info_list(ImageInfoList *list)
2593 {
2594     QString *str;
2595     QObject *obj;
2596     Visitor *v = qobject_output_visitor_new(&obj);
2597 
2598     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2599     visit_complete(v, &obj);
2600     str = qobject_to_json_pretty(obj);
2601     assert(str != NULL);
2602     printf("%s\n", qstring_get_str(str));
2603     qobject_unref(obj);
2604     visit_free(v);
2605     qobject_unref(str);
2606 }
2607 
2608 static void dump_json_image_info(ImageInfo *info)
2609 {
2610     QString *str;
2611     QObject *obj;
2612     Visitor *v = qobject_output_visitor_new(&obj);
2613 
2614     visit_type_ImageInfo(v, NULL, &info, &error_abort);
2615     visit_complete(v, &obj);
2616     str = qobject_to_json_pretty(obj);
2617     assert(str != NULL);
2618     printf("%s\n", qstring_get_str(str));
2619     qobject_unref(obj);
2620     visit_free(v);
2621     qobject_unref(str);
2622 }
2623 
2624 static void dump_human_image_info_list(ImageInfoList *list)
2625 {
2626     ImageInfoList *elem;
2627     bool delim = false;
2628 
2629     for (elem = list; elem; elem = elem->next) {
2630         if (delim) {
2631             printf("\n");
2632         }
2633         delim = true;
2634 
2635         bdrv_image_info_dump(elem->value);
2636     }
2637 }
2638 
2639 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2640 {
2641     return strcmp(a, b) == 0;
2642 }
2643 
2644 /**
2645  * Open an image file chain and return an ImageInfoList
2646  *
2647  * @filename: topmost image filename
2648  * @fmt: topmost image format (may be NULL to autodetect)
2649  * @chain: true  - enumerate entire backing file chain
2650  *         false - only topmost image file
2651  *
2652  * Returns a list of ImageInfo objects or NULL if there was an error opening an
2653  * image file.  If there was an error a message will have been printed to
2654  * stderr.
2655  */
2656 static ImageInfoList *collect_image_info_list(bool image_opts,
2657                                               const char *filename,
2658                                               const char *fmt,
2659                                               bool chain, bool force_share)
2660 {
2661     ImageInfoList *head = NULL;
2662     ImageInfoList **last = &head;
2663     GHashTable *filenames;
2664     Error *err = NULL;
2665 
2666     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2667 
2668     while (filename) {
2669         BlockBackend *blk;
2670         BlockDriverState *bs;
2671         ImageInfo *info;
2672         ImageInfoList *elem;
2673 
2674         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2675             error_report("Backing file '%s' creates an infinite loop.",
2676                          filename);
2677             goto err;
2678         }
2679         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2680 
2681         blk = img_open(image_opts, filename, fmt,
2682                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2683                        force_share);
2684         if (!blk) {
2685             goto err;
2686         }
2687         bs = blk_bs(blk);
2688 
2689         bdrv_query_image_info(bs, &info, &err);
2690         if (err) {
2691             error_report_err(err);
2692             blk_unref(blk);
2693             goto err;
2694         }
2695 
2696         elem = g_new0(ImageInfoList, 1);
2697         elem->value = info;
2698         *last = elem;
2699         last = &elem->next;
2700 
2701         blk_unref(blk);
2702 
2703         /* Clear parameters that only apply to the topmost image */
2704         filename = fmt = NULL;
2705         image_opts = false;
2706 
2707         if (chain) {
2708             if (info->has_full_backing_filename) {
2709                 filename = info->full_backing_filename;
2710             } else if (info->has_backing_filename) {
2711                 error_report("Could not determine absolute backing filename,"
2712                              " but backing filename '%s' present",
2713                              info->backing_filename);
2714                 goto err;
2715             }
2716             if (info->has_backing_filename_format) {
2717                 fmt = info->backing_filename_format;
2718             }
2719         }
2720     }
2721     g_hash_table_destroy(filenames);
2722     return head;
2723 
2724 err:
2725     qapi_free_ImageInfoList(head);
2726     g_hash_table_destroy(filenames);
2727     return NULL;
2728 }
2729 
2730 static int img_info(int argc, char **argv)
2731 {
2732     int c;
2733     OutputFormat output_format = OFORMAT_HUMAN;
2734     bool chain = false;
2735     const char *filename, *fmt, *output;
2736     ImageInfoList *list;
2737     bool image_opts = false;
2738     bool force_share = false;
2739 
2740     fmt = NULL;
2741     output = NULL;
2742     for(;;) {
2743         int option_index = 0;
2744         static const struct option long_options[] = {
2745             {"help", no_argument, 0, 'h'},
2746             {"format", required_argument, 0, 'f'},
2747             {"output", required_argument, 0, OPTION_OUTPUT},
2748             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2749             {"object", required_argument, 0, OPTION_OBJECT},
2750             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2751             {"force-share", no_argument, 0, 'U'},
2752             {0, 0, 0, 0}
2753         };
2754         c = getopt_long(argc, argv, ":f:hU",
2755                         long_options, &option_index);
2756         if (c == -1) {
2757             break;
2758         }
2759         switch(c) {
2760         case ':':
2761             missing_argument(argv[optind - 1]);
2762             break;
2763         case '?':
2764             unrecognized_option(argv[optind - 1]);
2765             break;
2766         case 'h':
2767             help();
2768             break;
2769         case 'f':
2770             fmt = optarg;
2771             break;
2772         case 'U':
2773             force_share = true;
2774             break;
2775         case OPTION_OUTPUT:
2776             output = optarg;
2777             break;
2778         case OPTION_BACKING_CHAIN:
2779             chain = true;
2780             break;
2781         case OPTION_OBJECT: {
2782             QemuOpts *opts;
2783             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2784                                            optarg, true);
2785             if (!opts) {
2786                 return 1;
2787             }
2788         }   break;
2789         case OPTION_IMAGE_OPTS:
2790             image_opts = true;
2791             break;
2792         }
2793     }
2794     if (optind != argc - 1) {
2795         error_exit("Expecting one image file name");
2796     }
2797     filename = argv[optind++];
2798 
2799     if (output && !strcmp(output, "json")) {
2800         output_format = OFORMAT_JSON;
2801     } else if (output && !strcmp(output, "human")) {
2802         output_format = OFORMAT_HUMAN;
2803     } else if (output) {
2804         error_report("--output must be used with human or json as argument.");
2805         return 1;
2806     }
2807 
2808     if (qemu_opts_foreach(&qemu_object_opts,
2809                           user_creatable_add_opts_foreach,
2810                           qemu_img_object_print_help, &error_fatal)) {
2811         return 1;
2812     }
2813 
2814     list = collect_image_info_list(image_opts, filename, fmt, chain,
2815                                    force_share);
2816     if (!list) {
2817         return 1;
2818     }
2819 
2820     switch (output_format) {
2821     case OFORMAT_HUMAN:
2822         dump_human_image_info_list(list);
2823         break;
2824     case OFORMAT_JSON:
2825         if (chain) {
2826             dump_json_image_info_list(list);
2827         } else {
2828             dump_json_image_info(list->value);
2829         }
2830         break;
2831     }
2832 
2833     qapi_free_ImageInfoList(list);
2834     return 0;
2835 }
2836 
2837 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
2838                           MapEntry *next)
2839 {
2840     switch (output_format) {
2841     case OFORMAT_HUMAN:
2842         if (e->data && !e->has_offset) {
2843             error_report("File contains external, encrypted or compressed clusters.");
2844             return -1;
2845         }
2846         if (e->data && !e->zero) {
2847             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2848                    e->start, e->length,
2849                    e->has_offset ? e->offset : 0,
2850                    e->has_filename ? e->filename : "");
2851         }
2852         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2853          * Modify the flags here to allow more coalescing.
2854          */
2855         if (next && (!next->data || next->zero)) {
2856             next->data = false;
2857             next->zero = true;
2858         }
2859         break;
2860     case OFORMAT_JSON:
2861         printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2862                " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2863                (e->start == 0 ? "[" : ",\n"),
2864                e->start, e->length, e->depth,
2865                e->zero ? "true" : "false",
2866                e->data ? "true" : "false");
2867         if (e->has_offset) {
2868             printf(", \"offset\": %"PRId64"", e->offset);
2869         }
2870         putchar('}');
2871 
2872         if (!next) {
2873             printf("]\n");
2874         }
2875         break;
2876     }
2877     return 0;
2878 }
2879 
2880 static int get_block_status(BlockDriverState *bs, int64_t offset,
2881                             int64_t bytes, MapEntry *e)
2882 {
2883     int ret;
2884     int depth;
2885     BlockDriverState *file;
2886     bool has_offset;
2887     int64_t map;
2888     char *filename = NULL;
2889 
2890     /* As an optimization, we could cache the current range of unallocated
2891      * clusters in each file of the chain, and avoid querying the same
2892      * range repeatedly.
2893      */
2894 
2895     depth = 0;
2896     for (;;) {
2897         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
2898         if (ret < 0) {
2899             return ret;
2900         }
2901         assert(bytes);
2902         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2903             break;
2904         }
2905         bs = backing_bs(bs);
2906         if (bs == NULL) {
2907             ret = 0;
2908             break;
2909         }
2910 
2911         depth++;
2912     }
2913 
2914     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2915 
2916     if (file && has_offset) {
2917         bdrv_refresh_filename(file);
2918         filename = file->filename;
2919     }
2920 
2921     *e = (MapEntry) {
2922         .start = offset,
2923         .length = bytes,
2924         .data = !!(ret & BDRV_BLOCK_DATA),
2925         .zero = !!(ret & BDRV_BLOCK_ZERO),
2926         .offset = map,
2927         .has_offset = has_offset,
2928         .depth = depth,
2929         .has_filename = filename,
2930         .filename = filename,
2931     };
2932 
2933     return 0;
2934 }
2935 
2936 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2937 {
2938     if (curr->length == 0) {
2939         return false;
2940     }
2941     if (curr->zero != next->zero ||
2942         curr->data != next->data ||
2943         curr->depth != next->depth ||
2944         curr->has_filename != next->has_filename ||
2945         curr->has_offset != next->has_offset) {
2946         return false;
2947     }
2948     if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2949         return false;
2950     }
2951     if (curr->has_offset && curr->offset + curr->length != next->offset) {
2952         return false;
2953     }
2954     return true;
2955 }
2956 
2957 static int img_map(int argc, char **argv)
2958 {
2959     int c;
2960     OutputFormat output_format = OFORMAT_HUMAN;
2961     BlockBackend *blk;
2962     BlockDriverState *bs;
2963     const char *filename, *fmt, *output;
2964     int64_t length;
2965     MapEntry curr = { .length = 0 }, next;
2966     int ret = 0;
2967     bool image_opts = false;
2968     bool force_share = false;
2969 
2970     fmt = NULL;
2971     output = NULL;
2972     for (;;) {
2973         int option_index = 0;
2974         static const struct option long_options[] = {
2975             {"help", no_argument, 0, 'h'},
2976             {"format", required_argument, 0, 'f'},
2977             {"output", required_argument, 0, OPTION_OUTPUT},
2978             {"object", required_argument, 0, OPTION_OBJECT},
2979             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2980             {"force-share", no_argument, 0, 'U'},
2981             {0, 0, 0, 0}
2982         };
2983         c = getopt_long(argc, argv, ":f:hU",
2984                         long_options, &option_index);
2985         if (c == -1) {
2986             break;
2987         }
2988         switch (c) {
2989         case ':':
2990             missing_argument(argv[optind - 1]);
2991             break;
2992         case '?':
2993             unrecognized_option(argv[optind - 1]);
2994             break;
2995         case 'h':
2996             help();
2997             break;
2998         case 'f':
2999             fmt = optarg;
3000             break;
3001         case 'U':
3002             force_share = true;
3003             break;
3004         case OPTION_OUTPUT:
3005             output = optarg;
3006             break;
3007         case OPTION_OBJECT: {
3008             QemuOpts *opts;
3009             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3010                                            optarg, true);
3011             if (!opts) {
3012                 return 1;
3013             }
3014         }   break;
3015         case OPTION_IMAGE_OPTS:
3016             image_opts = true;
3017             break;
3018         }
3019     }
3020     if (optind != argc - 1) {
3021         error_exit("Expecting one image file name");
3022     }
3023     filename = argv[optind];
3024 
3025     if (output && !strcmp(output, "json")) {
3026         output_format = OFORMAT_JSON;
3027     } else if (output && !strcmp(output, "human")) {
3028         output_format = OFORMAT_HUMAN;
3029     } else if (output) {
3030         error_report("--output must be used with human or json as argument.");
3031         return 1;
3032     }
3033 
3034     if (qemu_opts_foreach(&qemu_object_opts,
3035                           user_creatable_add_opts_foreach,
3036                           qemu_img_object_print_help, &error_fatal)) {
3037         return 1;
3038     }
3039 
3040     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3041     if (!blk) {
3042         return 1;
3043     }
3044     bs = blk_bs(blk);
3045 
3046     if (output_format == OFORMAT_HUMAN) {
3047         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3048     }
3049 
3050     length = blk_getlength(blk);
3051     while (curr.start + curr.length < length) {
3052         int64_t offset = curr.start + curr.length;
3053         int64_t n;
3054 
3055         /* Probe up to 1 GiB at a time.  */
3056         n = MIN(1 * GiB, length - offset);
3057         ret = get_block_status(bs, offset, n, &next);
3058 
3059         if (ret < 0) {
3060             error_report("Could not read file metadata: %s", strerror(-ret));
3061             goto out;
3062         }
3063 
3064         if (entry_mergeable(&curr, &next)) {
3065             curr.length += next.length;
3066             continue;
3067         }
3068 
3069         if (curr.length > 0) {
3070             ret = dump_map_entry(output_format, &curr, &next);
3071             if (ret < 0) {
3072                 goto out;
3073             }
3074         }
3075         curr = next;
3076     }
3077 
3078     ret = dump_map_entry(output_format, &curr, NULL);
3079 
3080 out:
3081     blk_unref(blk);
3082     return ret < 0;
3083 }
3084 
3085 #define SNAPSHOT_LIST   1
3086 #define SNAPSHOT_CREATE 2
3087 #define SNAPSHOT_APPLY  3
3088 #define SNAPSHOT_DELETE 4
3089 
3090 static int img_snapshot(int argc, char **argv)
3091 {
3092     BlockBackend *blk;
3093     BlockDriverState *bs;
3094     QEMUSnapshotInfo sn;
3095     char *filename, *snapshot_name = NULL;
3096     int c, ret = 0, bdrv_oflags;
3097     int action = 0;
3098     qemu_timeval tv;
3099     bool quiet = false;
3100     Error *err = NULL;
3101     bool image_opts = false;
3102     bool force_share = false;
3103 
3104     bdrv_oflags = BDRV_O_RDWR;
3105     /* Parse commandline parameters */
3106     for(;;) {
3107         static const struct option long_options[] = {
3108             {"help", no_argument, 0, 'h'},
3109             {"object", required_argument, 0, OPTION_OBJECT},
3110             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3111             {"force-share", no_argument, 0, 'U'},
3112             {0, 0, 0, 0}
3113         };
3114         c = getopt_long(argc, argv, ":la:c:d:hqU",
3115                         long_options, NULL);
3116         if (c == -1) {
3117             break;
3118         }
3119         switch(c) {
3120         case ':':
3121             missing_argument(argv[optind - 1]);
3122             break;
3123         case '?':
3124             unrecognized_option(argv[optind - 1]);
3125             break;
3126         case 'h':
3127             help();
3128             return 0;
3129         case 'l':
3130             if (action) {
3131                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3132                 return 0;
3133             }
3134             action = SNAPSHOT_LIST;
3135             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3136             break;
3137         case 'a':
3138             if (action) {
3139                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3140                 return 0;
3141             }
3142             action = SNAPSHOT_APPLY;
3143             snapshot_name = optarg;
3144             break;
3145         case 'c':
3146             if (action) {
3147                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3148                 return 0;
3149             }
3150             action = SNAPSHOT_CREATE;
3151             snapshot_name = optarg;
3152             break;
3153         case 'd':
3154             if (action) {
3155                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3156                 return 0;
3157             }
3158             action = SNAPSHOT_DELETE;
3159             snapshot_name = optarg;
3160             break;
3161         case 'q':
3162             quiet = true;
3163             break;
3164         case 'U':
3165             force_share = true;
3166             break;
3167         case OPTION_OBJECT: {
3168             QemuOpts *opts;
3169             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3170                                            optarg, true);
3171             if (!opts) {
3172                 return 1;
3173             }
3174         }   break;
3175         case OPTION_IMAGE_OPTS:
3176             image_opts = true;
3177             break;
3178         }
3179     }
3180 
3181     if (optind != argc - 1) {
3182         error_exit("Expecting one image file name");
3183     }
3184     filename = argv[optind++];
3185 
3186     if (qemu_opts_foreach(&qemu_object_opts,
3187                           user_creatable_add_opts_foreach,
3188                           qemu_img_object_print_help, &error_fatal)) {
3189         return 1;
3190     }
3191 
3192     /* Open the image */
3193     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3194                    force_share);
3195     if (!blk) {
3196         return 1;
3197     }
3198     bs = blk_bs(blk);
3199 
3200     /* Perform the requested action */
3201     switch(action) {
3202     case SNAPSHOT_LIST:
3203         dump_snapshots(bs);
3204         break;
3205 
3206     case SNAPSHOT_CREATE:
3207         memset(&sn, 0, sizeof(sn));
3208         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3209 
3210         qemu_gettimeofday(&tv);
3211         sn.date_sec = tv.tv_sec;
3212         sn.date_nsec = tv.tv_usec * 1000;
3213 
3214         ret = bdrv_snapshot_create(bs, &sn);
3215         if (ret) {
3216             error_report("Could not create snapshot '%s': %d (%s)",
3217                 snapshot_name, ret, strerror(-ret));
3218         }
3219         break;
3220 
3221     case SNAPSHOT_APPLY:
3222         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3223         if (ret) {
3224             error_reportf_err(err, "Could not apply snapshot '%s': ",
3225                               snapshot_name);
3226         }
3227         break;
3228 
3229     case SNAPSHOT_DELETE:
3230         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3231         if (ret < 0) {
3232             error_report("Could not delete snapshot '%s': snapshot not "
3233                          "found", snapshot_name);
3234             ret = 1;
3235         } else {
3236             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3237             if (ret < 0) {
3238                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3239                                   snapshot_name);
3240                 ret = 1;
3241             }
3242         }
3243         break;
3244     }
3245 
3246     /* Cleanup */
3247     blk_unref(blk);
3248     if (ret) {
3249         return 1;
3250     }
3251     return 0;
3252 }
3253 
3254 static int img_rebase(int argc, char **argv)
3255 {
3256     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3257     uint8_t *buf_old = NULL;
3258     uint8_t *buf_new = NULL;
3259     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3260     char *filename;
3261     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3262     int c, flags, src_flags, ret;
3263     bool writethrough, src_writethrough;
3264     int unsafe = 0;
3265     bool force_share = false;
3266     int progress = 0;
3267     bool quiet = false;
3268     Error *local_err = NULL;
3269     bool image_opts = false;
3270 
3271     /* Parse commandline parameters */
3272     fmt = NULL;
3273     cache = BDRV_DEFAULT_CACHE;
3274     src_cache = BDRV_DEFAULT_CACHE;
3275     out_baseimg = NULL;
3276     out_basefmt = NULL;
3277     for(;;) {
3278         static const struct option long_options[] = {
3279             {"help", no_argument, 0, 'h'},
3280             {"object", required_argument, 0, OPTION_OBJECT},
3281             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3282             {"force-share", no_argument, 0, 'U'},
3283             {0, 0, 0, 0}
3284         };
3285         c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
3286                         long_options, NULL);
3287         if (c == -1) {
3288             break;
3289         }
3290         switch(c) {
3291         case ':':
3292             missing_argument(argv[optind - 1]);
3293             break;
3294         case '?':
3295             unrecognized_option(argv[optind - 1]);
3296             break;
3297         case 'h':
3298             help();
3299             return 0;
3300         case 'f':
3301             fmt = optarg;
3302             break;
3303         case 'F':
3304             out_basefmt = optarg;
3305             break;
3306         case 'b':
3307             out_baseimg = optarg;
3308             break;
3309         case 'u':
3310             unsafe = 1;
3311             break;
3312         case 'p':
3313             progress = 1;
3314             break;
3315         case 't':
3316             cache = optarg;
3317             break;
3318         case 'T':
3319             src_cache = optarg;
3320             break;
3321         case 'q':
3322             quiet = true;
3323             break;
3324         case OPTION_OBJECT: {
3325             QemuOpts *opts;
3326             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3327                                            optarg, true);
3328             if (!opts) {
3329                 return 1;
3330             }
3331         }   break;
3332         case OPTION_IMAGE_OPTS:
3333             image_opts = true;
3334             break;
3335         case 'U':
3336             force_share = true;
3337             break;
3338         }
3339     }
3340 
3341     if (quiet) {
3342         progress = 0;
3343     }
3344 
3345     if (optind != argc - 1) {
3346         error_exit("Expecting one image file name");
3347     }
3348     if (!unsafe && !out_baseimg) {
3349         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3350     }
3351     filename = argv[optind++];
3352 
3353     if (qemu_opts_foreach(&qemu_object_opts,
3354                           user_creatable_add_opts_foreach,
3355                           qemu_img_object_print_help, &error_fatal)) {
3356         return 1;
3357     }
3358 
3359     qemu_progress_init(progress, 2.0);
3360     qemu_progress_print(0, 100);
3361 
3362     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3363     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3364     if (ret < 0) {
3365         error_report("Invalid cache option: %s", cache);
3366         goto out;
3367     }
3368 
3369     src_flags = 0;
3370     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3371     if (ret < 0) {
3372         error_report("Invalid source cache option: %s", src_cache);
3373         goto out;
3374     }
3375 
3376     /* The source files are opened read-only, don't care about WCE */
3377     assert((src_flags & BDRV_O_RDWR) == 0);
3378     (void) src_writethrough;
3379 
3380     /*
3381      * Open the images.
3382      *
3383      * Ignore the old backing file for unsafe rebase in case we want to correct
3384      * the reference to a renamed or moved backing file.
3385      */
3386     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3387                    false);
3388     if (!blk) {
3389         ret = -1;
3390         goto out;
3391     }
3392     bs = blk_bs(blk);
3393 
3394     if (out_basefmt != NULL) {
3395         if (bdrv_find_format(out_basefmt) == NULL) {
3396             error_report("Invalid format name: '%s'", out_basefmt);
3397             ret = -1;
3398             goto out;
3399         }
3400     }
3401 
3402     /* For safe rebasing we need to compare old and new backing file */
3403     if (!unsafe) {
3404         QDict *options = NULL;
3405         BlockDriverState *base_bs = backing_bs(bs);
3406 
3407         if (base_bs) {
3408             blk_old_backing = blk_new(qemu_get_aio_context(),
3409                                       BLK_PERM_CONSISTENT_READ,
3410                                       BLK_PERM_ALL);
3411             ret = blk_insert_bs(blk_old_backing, base_bs,
3412                                 &local_err);
3413             if (ret < 0) {
3414                 error_reportf_err(local_err,
3415                                   "Could not reuse old backing file '%s': ",
3416                                   base_bs->filename);
3417                 goto out;
3418             }
3419         } else {
3420             blk_old_backing = NULL;
3421         }
3422 
3423         if (out_baseimg[0]) {
3424             const char *overlay_filename;
3425             char *out_real_path;
3426 
3427             options = qdict_new();
3428             if (out_basefmt) {
3429                 qdict_put_str(options, "driver", out_basefmt);
3430             }
3431             if (force_share) {
3432                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3433             }
3434 
3435             bdrv_refresh_filename(bs);
3436             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3437                                                      : bs->filename;
3438             out_real_path =
3439                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3440                                                              out_baseimg,
3441                                                              &local_err);
3442             if (local_err) {
3443                 qobject_unref(options);
3444                 error_reportf_err(local_err,
3445                                   "Could not resolve backing filename: ");
3446                 ret = -1;
3447                 goto out;
3448             }
3449 
3450             /*
3451              * Find out whether we rebase an image on top of a previous image
3452              * in its chain.
3453              */
3454             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3455             if (prefix_chain_bs) {
3456                 qobject_unref(options);
3457                 g_free(out_real_path);
3458 
3459                 blk_new_backing = blk_new(qemu_get_aio_context(),
3460                                           BLK_PERM_CONSISTENT_READ,
3461                                           BLK_PERM_ALL);
3462                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3463                                     &local_err);
3464                 if (ret < 0) {
3465                     error_reportf_err(local_err,
3466                                       "Could not reuse backing file '%s': ",
3467                                       out_baseimg);
3468                     goto out;
3469                 }
3470             } else {
3471                 blk_new_backing = blk_new_open(out_real_path, NULL,
3472                                                options, src_flags, &local_err);
3473                 g_free(out_real_path);
3474                 if (!blk_new_backing) {
3475                     error_reportf_err(local_err,
3476                                       "Could not open new backing file '%s': ",
3477                                       out_baseimg);
3478                     ret = -1;
3479                     goto out;
3480                 }
3481             }
3482         }
3483     }
3484 
3485     /*
3486      * Check each unallocated cluster in the COW file. If it is unallocated,
3487      * accesses go to the backing file. We must therefore compare this cluster
3488      * in the old and new backing file, and if they differ we need to copy it
3489      * from the old backing file into the COW file.
3490      *
3491      * If qemu-img crashes during this step, no harm is done. The content of
3492      * the image is the same as the original one at any time.
3493      */
3494     if (!unsafe) {
3495         int64_t size;
3496         int64_t old_backing_size = 0;
3497         int64_t new_backing_size = 0;
3498         uint64_t offset;
3499         int64_t n;
3500         float local_progress = 0;
3501 
3502         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3503         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3504 
3505         size = blk_getlength(blk);
3506         if (size < 0) {
3507             error_report("Could not get size of '%s': %s",
3508                          filename, strerror(-size));
3509             ret = -1;
3510             goto out;
3511         }
3512         if (blk_old_backing) {
3513             old_backing_size = blk_getlength(blk_old_backing);
3514             if (old_backing_size < 0) {
3515                 char backing_name[PATH_MAX];
3516 
3517                 bdrv_get_backing_filename(bs, backing_name,
3518                                           sizeof(backing_name));
3519                 error_report("Could not get size of '%s': %s",
3520                              backing_name, strerror(-old_backing_size));
3521                 ret = -1;
3522                 goto out;
3523             }
3524         }
3525         if (blk_new_backing) {
3526             new_backing_size = blk_getlength(blk_new_backing);
3527             if (new_backing_size < 0) {
3528                 error_report("Could not get size of '%s': %s",
3529                              out_baseimg, strerror(-new_backing_size));
3530                 ret = -1;
3531                 goto out;
3532             }
3533         }
3534 
3535         if (size != 0) {
3536             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3537         }
3538 
3539         for (offset = 0; offset < size; offset += n) {
3540             bool buf_old_is_zero = false;
3541 
3542             /* How many bytes can we handle with the next read? */
3543             n = MIN(IO_BUF_SIZE, size - offset);
3544 
3545             /* If the cluster is allocated, we don't need to take action */
3546             ret = bdrv_is_allocated(bs, offset, n, &n);
3547             if (ret < 0) {
3548                 error_report("error while reading image metadata: %s",
3549                              strerror(-ret));
3550                 goto out;
3551             }
3552             if (ret) {
3553                 continue;
3554             }
3555 
3556             if (prefix_chain_bs) {
3557                 /*
3558                  * If cluster wasn't changed since prefix_chain, we don't need
3559                  * to take action
3560                  */
3561                 ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
3562                                               false, offset, n, &n);
3563                 if (ret < 0) {
3564                     error_report("error while reading image metadata: %s",
3565                                  strerror(-ret));
3566                     goto out;
3567                 }
3568                 if (!ret) {
3569                     continue;
3570                 }
3571             }
3572 
3573             /*
3574              * Read old and new backing file and take into consideration that
3575              * backing files may be smaller than the COW image.
3576              */
3577             if (offset >= old_backing_size) {
3578                 memset(buf_old, 0, n);
3579                 buf_old_is_zero = true;
3580             } else {
3581                 if (offset + n > old_backing_size) {
3582                     n = old_backing_size - offset;
3583                 }
3584 
3585                 ret = blk_pread(blk_old_backing, offset, buf_old, n);
3586                 if (ret < 0) {
3587                     error_report("error while reading from old backing file");
3588                     goto out;
3589                 }
3590             }
3591 
3592             if (offset >= new_backing_size || !blk_new_backing) {
3593                 memset(buf_new, 0, n);
3594             } else {
3595                 if (offset + n > new_backing_size) {
3596                     n = new_backing_size - offset;
3597                 }
3598 
3599                 ret = blk_pread(blk_new_backing, offset, buf_new, n);
3600                 if (ret < 0) {
3601                     error_report("error while reading from new backing file");
3602                     goto out;
3603                 }
3604             }
3605 
3606             /* If they differ, we need to write to the COW file */
3607             uint64_t written = 0;
3608 
3609             while (written < n) {
3610                 int64_t pnum;
3611 
3612                 if (compare_buffers(buf_old + written, buf_new + written,
3613                                     n - written, &pnum))
3614                 {
3615                     if (buf_old_is_zero) {
3616                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
3617                     } else {
3618                         ret = blk_pwrite(blk, offset + written,
3619                                          buf_old + written, pnum, 0);
3620                     }
3621                     if (ret < 0) {
3622                         error_report("Error while writing to COW image: %s",
3623                             strerror(-ret));
3624                         goto out;
3625                     }
3626                 }
3627 
3628                 written += pnum;
3629             }
3630             qemu_progress_print(local_progress, 100);
3631         }
3632     }
3633 
3634     /*
3635      * Change the backing file. All clusters that are different from the old
3636      * backing file are overwritten in the COW file now, so the visible content
3637      * doesn't change when we switch the backing file.
3638      */
3639     if (out_baseimg && *out_baseimg) {
3640         ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3641     } else {
3642         ret = bdrv_change_backing_file(bs, NULL, NULL);
3643     }
3644 
3645     if (ret == -ENOSPC) {
3646         error_report("Could not change the backing file to '%s': No "
3647                      "space left in the file header", out_baseimg);
3648     } else if (ret < 0) {
3649         error_report("Could not change the backing file to '%s': %s",
3650             out_baseimg, strerror(-ret));
3651     }
3652 
3653     qemu_progress_print(100, 0);
3654     /*
3655      * TODO At this point it is possible to check if any clusters that are
3656      * allocated in the COW file are the same in the backing file. If so, they
3657      * could be dropped from the COW file. Don't do this before switching the
3658      * backing file, in case of a crash this would lead to corruption.
3659      */
3660 out:
3661     qemu_progress_end();
3662     /* Cleanup */
3663     if (!unsafe) {
3664         blk_unref(blk_old_backing);
3665         blk_unref(blk_new_backing);
3666     }
3667     qemu_vfree(buf_old);
3668     qemu_vfree(buf_new);
3669 
3670     blk_unref(blk);
3671     if (ret) {
3672         return 1;
3673     }
3674     return 0;
3675 }
3676 
3677 static int img_resize(int argc, char **argv)
3678 {
3679     Error *err = NULL;
3680     int c, ret, relative;
3681     const char *filename, *fmt, *size;
3682     int64_t n, total_size, current_size;
3683     bool quiet = false;
3684     BlockBackend *blk = NULL;
3685     PreallocMode prealloc = PREALLOC_MODE_OFF;
3686     QemuOpts *param;
3687 
3688     static QemuOptsList resize_options = {
3689         .name = "resize_options",
3690         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3691         .desc = {
3692             {
3693                 .name = BLOCK_OPT_SIZE,
3694                 .type = QEMU_OPT_SIZE,
3695                 .help = "Virtual disk size"
3696             }, {
3697                 /* end of list */
3698             }
3699         },
3700     };
3701     bool image_opts = false;
3702     bool shrink = false;
3703 
3704     /* Remove size from argv manually so that negative numbers are not treated
3705      * as options by getopt. */
3706     if (argc < 3) {
3707         error_exit("Not enough arguments");
3708         return 1;
3709     }
3710 
3711     size = argv[--argc];
3712 
3713     /* Parse getopt arguments */
3714     fmt = NULL;
3715     for(;;) {
3716         static const struct option long_options[] = {
3717             {"help", no_argument, 0, 'h'},
3718             {"object", required_argument, 0, OPTION_OBJECT},
3719             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3720             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
3721             {"shrink", no_argument, 0, OPTION_SHRINK},
3722             {0, 0, 0, 0}
3723         };
3724         c = getopt_long(argc, argv, ":f:hq",
3725                         long_options, NULL);
3726         if (c == -1) {
3727             break;
3728         }
3729         switch(c) {
3730         case ':':
3731             missing_argument(argv[optind - 1]);
3732             break;
3733         case '?':
3734             unrecognized_option(argv[optind - 1]);
3735             break;
3736         case 'h':
3737             help();
3738             break;
3739         case 'f':
3740             fmt = optarg;
3741             break;
3742         case 'q':
3743             quiet = true;
3744             break;
3745         case OPTION_OBJECT: {
3746             QemuOpts *opts;
3747             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3748                                            optarg, true);
3749             if (!opts) {
3750                 return 1;
3751             }
3752         }   break;
3753         case OPTION_IMAGE_OPTS:
3754             image_opts = true;
3755             break;
3756         case OPTION_PREALLOCATION:
3757             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
3758                                        PREALLOC_MODE__MAX, NULL);
3759             if (prealloc == PREALLOC_MODE__MAX) {
3760                 error_report("Invalid preallocation mode '%s'", optarg);
3761                 return 1;
3762             }
3763             break;
3764         case OPTION_SHRINK:
3765             shrink = true;
3766             break;
3767         }
3768     }
3769     if (optind != argc - 1) {
3770         error_exit("Expecting image file name and size");
3771     }
3772     filename = argv[optind++];
3773 
3774     if (qemu_opts_foreach(&qemu_object_opts,
3775                           user_creatable_add_opts_foreach,
3776                           qemu_img_object_print_help, &error_fatal)) {
3777         return 1;
3778     }
3779 
3780     /* Choose grow, shrink, or absolute resize mode */
3781     switch (size[0]) {
3782     case '+':
3783         relative = 1;
3784         size++;
3785         break;
3786     case '-':
3787         relative = -1;
3788         size++;
3789         break;
3790     default:
3791         relative = 0;
3792         break;
3793     }
3794 
3795     /* Parse size */
3796     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3797     qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3798     if (err) {
3799         error_report_err(err);
3800         ret = -1;
3801         qemu_opts_del(param);
3802         goto out;
3803     }
3804     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3805     qemu_opts_del(param);
3806 
3807     blk = img_open(image_opts, filename, fmt,
3808                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
3809                    false);
3810     if (!blk) {
3811         ret = -1;
3812         goto out;
3813     }
3814 
3815     current_size = blk_getlength(blk);
3816     if (current_size < 0) {
3817         error_report("Failed to inquire current image length: %s",
3818                      strerror(-current_size));
3819         ret = -1;
3820         goto out;
3821     }
3822 
3823     if (relative) {
3824         total_size = current_size + n * relative;
3825     } else {
3826         total_size = n;
3827     }
3828     if (total_size <= 0) {
3829         error_report("New image size must be positive");
3830         ret = -1;
3831         goto out;
3832     }
3833 
3834     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
3835         error_report("Preallocation can only be used for growing images");
3836         ret = -1;
3837         goto out;
3838     }
3839 
3840     if (total_size < current_size && !shrink) {
3841         warn_report("Shrinking an image will delete all data beyond the "
3842                     "shrunken image's end. Before performing such an "
3843                     "operation, make sure there is no important data there.");
3844 
3845         if (g_strcmp0(bdrv_get_format_name(blk_bs(blk)), "raw") != 0) {
3846             error_report(
3847               "Use the --shrink option to perform a shrink operation.");
3848             ret = -1;
3849             goto out;
3850         } else {
3851             warn_report("Using the --shrink option will suppress this message. "
3852                         "Note that future versions of qemu-img may refuse to "
3853                         "shrink images without this option.");
3854         }
3855     }
3856 
3857     /*
3858      * The user expects the image to have the desired size after
3859      * resizing, so pass @exact=true.  It is of no use to report
3860      * success when the image has not actually been resized.
3861      */
3862     ret = blk_truncate(blk, total_size, true, prealloc, &err);
3863     if (!ret) {
3864         qprintf(quiet, "Image resized.\n");
3865     } else {
3866         error_report_err(err);
3867     }
3868 out:
3869     blk_unref(blk);
3870     if (ret) {
3871         return 1;
3872     }
3873     return 0;
3874 }
3875 
3876 static void amend_status_cb(BlockDriverState *bs,
3877                             int64_t offset, int64_t total_work_size,
3878                             void *opaque)
3879 {
3880     qemu_progress_print(100.f * offset / total_work_size, 0);
3881 }
3882 
3883 static int print_amend_option_help(const char *format)
3884 {
3885     BlockDriver *drv;
3886 
3887     /* Find driver and parse its options */
3888     drv = bdrv_find_format(format);
3889     if (!drv) {
3890         error_report("Unknown file format '%s'", format);
3891         return 1;
3892     }
3893 
3894     if (!drv->bdrv_amend_options) {
3895         error_report("Format driver '%s' does not support option amendment",
3896                      format);
3897         return 1;
3898     }
3899 
3900     /* Every driver supporting amendment must have create_opts */
3901     assert(drv->create_opts);
3902 
3903     printf("Creation options for '%s':\n", format);
3904     qemu_opts_print_help(drv->create_opts, false);
3905     printf("\nNote that not all of these options may be amendable.\n");
3906     return 0;
3907 }
3908 
3909 static int img_amend(int argc, char **argv)
3910 {
3911     Error *err = NULL;
3912     int c, ret = 0;
3913     char *options = NULL;
3914     QemuOptsList *create_opts = NULL;
3915     QemuOpts *opts = NULL;
3916     const char *fmt = NULL, *filename, *cache;
3917     int flags;
3918     bool writethrough;
3919     bool quiet = false, progress = false;
3920     BlockBackend *blk = NULL;
3921     BlockDriverState *bs = NULL;
3922     bool image_opts = false;
3923 
3924     cache = BDRV_DEFAULT_CACHE;
3925     for (;;) {
3926         static const struct option long_options[] = {
3927             {"help", no_argument, 0, 'h'},
3928             {"object", required_argument, 0, OPTION_OBJECT},
3929             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3930             {0, 0, 0, 0}
3931         };
3932         c = getopt_long(argc, argv, ":ho:f:t:pq",
3933                         long_options, NULL);
3934         if (c == -1) {
3935             break;
3936         }
3937 
3938         switch (c) {
3939         case ':':
3940             missing_argument(argv[optind - 1]);
3941             break;
3942         case '?':
3943             unrecognized_option(argv[optind - 1]);
3944             break;
3945         case 'h':
3946             help();
3947             break;
3948         case 'o':
3949             if (!is_valid_option_list(optarg)) {
3950                 error_report("Invalid option list: %s", optarg);
3951                 ret = -1;
3952                 goto out_no_progress;
3953             }
3954             if (!options) {
3955                 options = g_strdup(optarg);
3956             } else {
3957                 char *old_options = options;
3958                 options = g_strdup_printf("%s,%s", options, optarg);
3959                 g_free(old_options);
3960             }
3961             break;
3962         case 'f':
3963             fmt = optarg;
3964             break;
3965         case 't':
3966             cache = optarg;
3967             break;
3968         case 'p':
3969             progress = true;
3970             break;
3971         case 'q':
3972             quiet = true;
3973             break;
3974         case OPTION_OBJECT:
3975             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3976                                            optarg, true);
3977             if (!opts) {
3978                 ret = -1;
3979                 goto out_no_progress;
3980             }
3981             break;
3982         case OPTION_IMAGE_OPTS:
3983             image_opts = true;
3984             break;
3985         }
3986     }
3987 
3988     if (!options) {
3989         error_exit("Must specify options (-o)");
3990     }
3991 
3992     if (qemu_opts_foreach(&qemu_object_opts,
3993                           user_creatable_add_opts_foreach,
3994                           qemu_img_object_print_help, &error_fatal)) {
3995         ret = -1;
3996         goto out_no_progress;
3997     }
3998 
3999     if (quiet) {
4000         progress = false;
4001     }
4002     qemu_progress_init(progress, 1.0);
4003 
4004     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4005     if (fmt && has_help_option(options)) {
4006         /* If a format is explicitly specified (and possibly no filename is
4007          * given), print option help here */
4008         ret = print_amend_option_help(fmt);
4009         goto out;
4010     }
4011 
4012     if (optind != argc - 1) {
4013         error_report("Expecting one image file name");
4014         ret = -1;
4015         goto out;
4016     }
4017 
4018     flags = BDRV_O_RDWR;
4019     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4020     if (ret < 0) {
4021         error_report("Invalid cache option: %s", cache);
4022         goto out;
4023     }
4024 
4025     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4026                    false);
4027     if (!blk) {
4028         ret = -1;
4029         goto out;
4030     }
4031     bs = blk_bs(blk);
4032 
4033     fmt = bs->drv->format_name;
4034 
4035     if (has_help_option(options)) {
4036         /* If the format was auto-detected, print option help here */
4037         ret = print_amend_option_help(fmt);
4038         goto out;
4039     }
4040 
4041     if (!bs->drv->bdrv_amend_options) {
4042         error_report("Format driver '%s' does not support option amendment",
4043                      fmt);
4044         ret = -1;
4045         goto out;
4046     }
4047 
4048     /* Every driver supporting amendment must have create_opts */
4049     assert(bs->drv->create_opts);
4050 
4051     create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
4052     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4053     qemu_opts_do_parse(opts, options, NULL, &err);
4054     if (err) {
4055         error_report_err(err);
4056         ret = -1;
4057         goto out;
4058     }
4059 
4060     /* In case the driver does not call amend_status_cb() */
4061     qemu_progress_print(0.f, 0);
4062     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, &err);
4063     qemu_progress_print(100.f, 0);
4064     if (ret < 0) {
4065         error_report_err(err);
4066         goto out;
4067     }
4068 
4069 out:
4070     qemu_progress_end();
4071 
4072 out_no_progress:
4073     blk_unref(blk);
4074     qemu_opts_del(opts);
4075     qemu_opts_free(create_opts);
4076     g_free(options);
4077 
4078     if (ret) {
4079         return 1;
4080     }
4081     return 0;
4082 }
4083 
4084 typedef struct BenchData {
4085     BlockBackend *blk;
4086     uint64_t image_size;
4087     bool write;
4088     int bufsize;
4089     int step;
4090     int nrreq;
4091     int n;
4092     int flush_interval;
4093     bool drain_on_flush;
4094     uint8_t *buf;
4095     QEMUIOVector *qiov;
4096 
4097     int in_flight;
4098     bool in_flush;
4099     uint64_t offset;
4100 } BenchData;
4101 
4102 static void bench_undrained_flush_cb(void *opaque, int ret)
4103 {
4104     if (ret < 0) {
4105         error_report("Failed flush request: %s", strerror(-ret));
4106         exit(EXIT_FAILURE);
4107     }
4108 }
4109 
4110 static void bench_cb(void *opaque, int ret)
4111 {
4112     BenchData *b = opaque;
4113     BlockAIOCB *acb;
4114 
4115     if (ret < 0) {
4116         error_report("Failed request: %s", strerror(-ret));
4117         exit(EXIT_FAILURE);
4118     }
4119 
4120     if (b->in_flush) {
4121         /* Just finished a flush with drained queue: Start next requests */
4122         assert(b->in_flight == 0);
4123         b->in_flush = false;
4124     } else if (b->in_flight > 0) {
4125         int remaining = b->n - b->in_flight;
4126 
4127         b->n--;
4128         b->in_flight--;
4129 
4130         /* Time for flush? Drain queue if requested, then flush */
4131         if (b->flush_interval && remaining % b->flush_interval == 0) {
4132             if (!b->in_flight || !b->drain_on_flush) {
4133                 BlockCompletionFunc *cb;
4134 
4135                 if (b->drain_on_flush) {
4136                     b->in_flush = true;
4137                     cb = bench_cb;
4138                 } else {
4139                     cb = bench_undrained_flush_cb;
4140                 }
4141 
4142                 acb = blk_aio_flush(b->blk, cb, b);
4143                 if (!acb) {
4144                     error_report("Failed to issue flush request");
4145                     exit(EXIT_FAILURE);
4146                 }
4147             }
4148             if (b->drain_on_flush) {
4149                 return;
4150             }
4151         }
4152     }
4153 
4154     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4155         int64_t offset = b->offset;
4156         /* blk_aio_* might look for completed I/Os and kick bench_cb
4157          * again, so make sure this operation is counted by in_flight
4158          * and b->offset is ready for the next submission.
4159          */
4160         b->in_flight++;
4161         b->offset += b->step;
4162         b->offset %= b->image_size;
4163         if (b->write) {
4164             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4165         } else {
4166             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4167         }
4168         if (!acb) {
4169             error_report("Failed to issue request");
4170             exit(EXIT_FAILURE);
4171         }
4172     }
4173 }
4174 
4175 static int img_bench(int argc, char **argv)
4176 {
4177     int c, ret = 0;
4178     const char *fmt = NULL, *filename;
4179     bool quiet = false;
4180     bool image_opts = false;
4181     bool is_write = false;
4182     int count = 75000;
4183     int depth = 64;
4184     int64_t offset = 0;
4185     size_t bufsize = 4096;
4186     int pattern = 0;
4187     size_t step = 0;
4188     int flush_interval = 0;
4189     bool drain_on_flush = true;
4190     int64_t image_size;
4191     BlockBackend *blk = NULL;
4192     BenchData data = {};
4193     int flags = 0;
4194     bool writethrough = false;
4195     struct timeval t1, t2;
4196     int i;
4197     bool force_share = false;
4198     size_t buf_size;
4199 
4200     for (;;) {
4201         static const struct option long_options[] = {
4202             {"help", no_argument, 0, 'h'},
4203             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4204             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4205             {"pattern", required_argument, 0, OPTION_PATTERN},
4206             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4207             {"force-share", no_argument, 0, 'U'},
4208             {0, 0, 0, 0}
4209         };
4210         c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
4211                         NULL);
4212         if (c == -1) {
4213             break;
4214         }
4215 
4216         switch (c) {
4217         case ':':
4218             missing_argument(argv[optind - 1]);
4219             break;
4220         case '?':
4221             unrecognized_option(argv[optind - 1]);
4222             break;
4223         case 'h':
4224             help();
4225             break;
4226         case 'c':
4227         {
4228             unsigned long res;
4229 
4230             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4231                 error_report("Invalid request count specified");
4232                 return 1;
4233             }
4234             count = res;
4235             break;
4236         }
4237         case 'd':
4238         {
4239             unsigned long res;
4240 
4241             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4242                 error_report("Invalid queue depth specified");
4243                 return 1;
4244             }
4245             depth = res;
4246             break;
4247         }
4248         case 'f':
4249             fmt = optarg;
4250             break;
4251         case 'n':
4252             flags |= BDRV_O_NATIVE_AIO;
4253             break;
4254         case 'i':
4255             ret = bdrv_parse_aio(optarg, &flags);
4256             if (ret < 0) {
4257                 error_report("Invalid aio option: %s", optarg);
4258                 ret = -1;
4259                 goto out;
4260             }
4261             break;
4262         case 'o':
4263         {
4264             offset = cvtnum(optarg);
4265             if (offset < 0) {
4266                 error_report("Invalid offset specified");
4267                 return 1;
4268             }
4269             break;
4270         }
4271             break;
4272         case 'q':
4273             quiet = true;
4274             break;
4275         case 's':
4276         {
4277             int64_t sval;
4278 
4279             sval = cvtnum(optarg);
4280             if (sval < 0 || sval > INT_MAX) {
4281                 error_report("Invalid buffer size specified");
4282                 return 1;
4283             }
4284 
4285             bufsize = sval;
4286             break;
4287         }
4288         case 'S':
4289         {
4290             int64_t sval;
4291 
4292             sval = cvtnum(optarg);
4293             if (sval < 0 || sval > INT_MAX) {
4294                 error_report("Invalid step size specified");
4295                 return 1;
4296             }
4297 
4298             step = sval;
4299             break;
4300         }
4301         case 't':
4302             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4303             if (ret < 0) {
4304                 error_report("Invalid cache mode");
4305                 ret = -1;
4306                 goto out;
4307             }
4308             break;
4309         case 'w':
4310             flags |= BDRV_O_RDWR;
4311             is_write = true;
4312             break;
4313         case 'U':
4314             force_share = true;
4315             break;
4316         case OPTION_PATTERN:
4317         {
4318             unsigned long res;
4319 
4320             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4321                 error_report("Invalid pattern byte specified");
4322                 return 1;
4323             }
4324             pattern = res;
4325             break;
4326         }
4327         case OPTION_FLUSH_INTERVAL:
4328         {
4329             unsigned long res;
4330 
4331             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4332                 error_report("Invalid flush interval specified");
4333                 return 1;
4334             }
4335             flush_interval = res;
4336             break;
4337         }
4338         case OPTION_NO_DRAIN:
4339             drain_on_flush = false;
4340             break;
4341         case OPTION_IMAGE_OPTS:
4342             image_opts = true;
4343             break;
4344         }
4345     }
4346 
4347     if (optind != argc - 1) {
4348         error_exit("Expecting one image file name");
4349     }
4350     filename = argv[argc - 1];
4351 
4352     if (!is_write && flush_interval) {
4353         error_report("--flush-interval is only available in write tests");
4354         ret = -1;
4355         goto out;
4356     }
4357     if (flush_interval && flush_interval < depth) {
4358         error_report("Flush interval can't be smaller than depth");
4359         ret = -1;
4360         goto out;
4361     }
4362 
4363     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4364                    force_share);
4365     if (!blk) {
4366         ret = -1;
4367         goto out;
4368     }
4369 
4370     image_size = blk_getlength(blk);
4371     if (image_size < 0) {
4372         ret = image_size;
4373         goto out;
4374     }
4375 
4376     data = (BenchData) {
4377         .blk            = blk,
4378         .image_size     = image_size,
4379         .bufsize        = bufsize,
4380         .step           = step ?: bufsize,
4381         .nrreq          = depth,
4382         .n              = count,
4383         .offset         = offset,
4384         .write          = is_write,
4385         .flush_interval = flush_interval,
4386         .drain_on_flush = drain_on_flush,
4387     };
4388     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4389            "(starting at offset %" PRId64 ", step size %d)\n",
4390            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4391            data.offset, data.step);
4392     if (flush_interval) {
4393         printf("Sending flush every %d requests\n", flush_interval);
4394     }
4395 
4396     buf_size = data.nrreq * data.bufsize;
4397     data.buf = blk_blockalign(blk, buf_size);
4398     memset(data.buf, pattern, data.nrreq * data.bufsize);
4399 
4400     blk_register_buf(blk, data.buf, buf_size);
4401 
4402     data.qiov = g_new(QEMUIOVector, data.nrreq);
4403     for (i = 0; i < data.nrreq; i++) {
4404         qemu_iovec_init(&data.qiov[i], 1);
4405         qemu_iovec_add(&data.qiov[i],
4406                        data.buf + i * data.bufsize, data.bufsize);
4407     }
4408 
4409     gettimeofday(&t1, NULL);
4410     bench_cb(&data, 0);
4411 
4412     while (data.n > 0) {
4413         main_loop_wait(false);
4414     }
4415     gettimeofday(&t2, NULL);
4416 
4417     printf("Run completed in %3.3f seconds.\n",
4418            (t2.tv_sec - t1.tv_sec)
4419            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4420 
4421 out:
4422     if (data.buf) {
4423         blk_unregister_buf(blk, data.buf);
4424     }
4425     qemu_vfree(data.buf);
4426     blk_unref(blk);
4427 
4428     if (ret) {
4429         return 1;
4430     }
4431     return 0;
4432 }
4433 
4434 #define C_BS      01
4435 #define C_COUNT   02
4436 #define C_IF      04
4437 #define C_OF      010
4438 #define C_SKIP    020
4439 
4440 struct DdInfo {
4441     unsigned int flags;
4442     int64_t count;
4443 };
4444 
4445 struct DdIo {
4446     int bsz;    /* Block size */
4447     char *filename;
4448     uint8_t *buf;
4449     int64_t offset;
4450 };
4451 
4452 struct DdOpts {
4453     const char *name;
4454     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
4455     unsigned int flag;
4456 };
4457 
4458 static int img_dd_bs(const char *arg,
4459                      struct DdIo *in, struct DdIo *out,
4460                      struct DdInfo *dd)
4461 {
4462     int64_t res;
4463 
4464     res = cvtnum(arg);
4465 
4466     if (res <= 0 || res > INT_MAX) {
4467         error_report("invalid number: '%s'", arg);
4468         return 1;
4469     }
4470     in->bsz = out->bsz = res;
4471 
4472     return 0;
4473 }
4474 
4475 static int img_dd_count(const char *arg,
4476                         struct DdIo *in, struct DdIo *out,
4477                         struct DdInfo *dd)
4478 {
4479     dd->count = cvtnum(arg);
4480 
4481     if (dd->count < 0) {
4482         error_report("invalid number: '%s'", arg);
4483         return 1;
4484     }
4485 
4486     return 0;
4487 }
4488 
4489 static int img_dd_if(const char *arg,
4490                      struct DdIo *in, struct DdIo *out,
4491                      struct DdInfo *dd)
4492 {
4493     in->filename = g_strdup(arg);
4494 
4495     return 0;
4496 }
4497 
4498 static int img_dd_of(const char *arg,
4499                      struct DdIo *in, struct DdIo *out,
4500                      struct DdInfo *dd)
4501 {
4502     out->filename = g_strdup(arg);
4503 
4504     return 0;
4505 }
4506 
4507 static int img_dd_skip(const char *arg,
4508                        struct DdIo *in, struct DdIo *out,
4509                        struct DdInfo *dd)
4510 {
4511     in->offset = cvtnum(arg);
4512 
4513     if (in->offset < 0) {
4514         error_report("invalid number: '%s'", arg);
4515         return 1;
4516     }
4517 
4518     return 0;
4519 }
4520 
4521 static int img_dd(int argc, char **argv)
4522 {
4523     int ret = 0;
4524     char *arg = NULL;
4525     char *tmp;
4526     BlockDriver *drv = NULL, *proto_drv = NULL;
4527     BlockBackend *blk1 = NULL, *blk2 = NULL;
4528     QemuOpts *opts = NULL;
4529     QemuOptsList *create_opts = NULL;
4530     Error *local_err = NULL;
4531     bool image_opts = false;
4532     int c, i;
4533     const char *out_fmt = "raw";
4534     const char *fmt = NULL;
4535     int64_t size = 0;
4536     int64_t block_count = 0, out_pos, in_pos;
4537     bool force_share = false;
4538     struct DdInfo dd = {
4539         .flags = 0,
4540         .count = 0,
4541     };
4542     struct DdIo in = {
4543         .bsz = 512, /* Block size is by default 512 bytes */
4544         .filename = NULL,
4545         .buf = NULL,
4546         .offset = 0
4547     };
4548     struct DdIo out = {
4549         .bsz = 512,
4550         .filename = NULL,
4551         .buf = NULL,
4552         .offset = 0
4553     };
4554 
4555     const struct DdOpts options[] = {
4556         { "bs", img_dd_bs, C_BS },
4557         { "count", img_dd_count, C_COUNT },
4558         { "if", img_dd_if, C_IF },
4559         { "of", img_dd_of, C_OF },
4560         { "skip", img_dd_skip, C_SKIP },
4561         { NULL, NULL, 0 }
4562     };
4563     const struct option long_options[] = {
4564         { "help", no_argument, 0, 'h'},
4565         { "object", required_argument, 0, OPTION_OBJECT},
4566         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4567         { "force-share", no_argument, 0, 'U'},
4568         { 0, 0, 0, 0 }
4569     };
4570 
4571     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
4572         if (c == EOF) {
4573             break;
4574         }
4575         switch (c) {
4576         case 'O':
4577             out_fmt = optarg;
4578             break;
4579         case 'f':
4580             fmt = optarg;
4581             break;
4582         case ':':
4583             missing_argument(argv[optind - 1]);
4584             break;
4585         case '?':
4586             unrecognized_option(argv[optind - 1]);
4587             break;
4588         case 'h':
4589             help();
4590             break;
4591         case 'U':
4592             force_share = true;
4593             break;
4594         case OPTION_OBJECT:
4595             if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) {
4596                 ret = -1;
4597                 goto out;
4598             }
4599             break;
4600         case OPTION_IMAGE_OPTS:
4601             image_opts = true;
4602             break;
4603         }
4604     }
4605 
4606     for (i = optind; i < argc; i++) {
4607         int j;
4608         arg = g_strdup(argv[i]);
4609 
4610         tmp = strchr(arg, '=');
4611         if (tmp == NULL) {
4612             error_report("unrecognized operand %s", arg);
4613             ret = -1;
4614             goto out;
4615         }
4616 
4617         *tmp++ = '\0';
4618 
4619         for (j = 0; options[j].name != NULL; j++) {
4620             if (!strcmp(arg, options[j].name)) {
4621                 break;
4622             }
4623         }
4624         if (options[j].name == NULL) {
4625             error_report("unrecognized operand %s", arg);
4626             ret = -1;
4627             goto out;
4628         }
4629 
4630         if (options[j].f(tmp, &in, &out, &dd) != 0) {
4631             ret = -1;
4632             goto out;
4633         }
4634         dd.flags |= options[j].flag;
4635         g_free(arg);
4636         arg = NULL;
4637     }
4638 
4639     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4640         error_report("Must specify both input and output files");
4641         ret = -1;
4642         goto out;
4643     }
4644 
4645     if (qemu_opts_foreach(&qemu_object_opts,
4646                           user_creatable_add_opts_foreach,
4647                           qemu_img_object_print_help, &error_fatal)) {
4648         ret = -1;
4649         goto out;
4650     }
4651 
4652     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
4653                     force_share);
4654 
4655     if (!blk1) {
4656         ret = -1;
4657         goto out;
4658     }
4659 
4660     drv = bdrv_find_format(out_fmt);
4661     if (!drv) {
4662         error_report("Unknown file format");
4663         ret = -1;
4664         goto out;
4665     }
4666     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4667 
4668     if (!proto_drv) {
4669         error_report_err(local_err);
4670         ret = -1;
4671         goto out;
4672     }
4673     if (!drv->create_opts) {
4674         error_report("Format driver '%s' does not support image creation",
4675                      drv->format_name);
4676         ret = -1;
4677         goto out;
4678     }
4679     if (!proto_drv->create_opts) {
4680         error_report("Protocol driver '%s' does not support image creation",
4681                      proto_drv->format_name);
4682         ret = -1;
4683         goto out;
4684     }
4685     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4686     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4687 
4688     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4689 
4690     size = blk_getlength(blk1);
4691     if (size < 0) {
4692         error_report("Failed to get size for '%s'", in.filename);
4693         ret = -1;
4694         goto out;
4695     }
4696 
4697     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4698         dd.count * in.bsz < size) {
4699         size = dd.count * in.bsz;
4700     }
4701 
4702     /* Overflow means the specified offset is beyond input image's size */
4703     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4704                               size < in.bsz * in.offset)) {
4705         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4706     } else {
4707         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4708                             size - in.bsz * in.offset, &error_abort);
4709     }
4710 
4711     ret = bdrv_create(drv, out.filename, opts, &local_err);
4712     if (ret < 0) {
4713         error_reportf_err(local_err,
4714                           "%s: error while creating output image: ",
4715                           out.filename);
4716         ret = -1;
4717         goto out;
4718     }
4719 
4720     /* TODO, we can't honour --image-opts for the target,
4721      * since it needs to be given in a format compatible
4722      * with the bdrv_create() call above which does not
4723      * support image-opts style.
4724      */
4725     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
4726                          false, false, false);
4727 
4728     if (!blk2) {
4729         ret = -1;
4730         goto out;
4731     }
4732 
4733     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4734                               size < in.offset * in.bsz)) {
4735         /* We give a warning if the skip option is bigger than the input
4736          * size and create an empty output disk image (i.e. like dd(1)).
4737          */
4738         error_report("%s: cannot skip to specified offset", in.filename);
4739         in_pos = size;
4740     } else {
4741         in_pos = in.offset * in.bsz;
4742     }
4743 
4744     in.buf = g_new(uint8_t, in.bsz);
4745 
4746     for (out_pos = 0; in_pos < size; block_count++) {
4747         int in_ret, out_ret;
4748 
4749         if (in_pos + in.bsz > size) {
4750             in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4751         } else {
4752             in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4753         }
4754         if (in_ret < 0) {
4755             error_report("error while reading from input image file: %s",
4756                          strerror(-in_ret));
4757             ret = -1;
4758             goto out;
4759         }
4760         in_pos += in_ret;
4761 
4762         out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4763 
4764         if (out_ret < 0) {
4765             error_report("error while writing to output image file: %s",
4766                          strerror(-out_ret));
4767             ret = -1;
4768             goto out;
4769         }
4770         out_pos += out_ret;
4771     }
4772 
4773 out:
4774     g_free(arg);
4775     qemu_opts_del(opts);
4776     qemu_opts_free(create_opts);
4777     blk_unref(blk1);
4778     blk_unref(blk2);
4779     g_free(in.filename);
4780     g_free(out.filename);
4781     g_free(in.buf);
4782     g_free(out.buf);
4783 
4784     if (ret) {
4785         return 1;
4786     }
4787     return 0;
4788 }
4789 
4790 static void dump_json_block_measure_info(BlockMeasureInfo *info)
4791 {
4792     QString *str;
4793     QObject *obj;
4794     Visitor *v = qobject_output_visitor_new(&obj);
4795 
4796     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
4797     visit_complete(v, &obj);
4798     str = qobject_to_json_pretty(obj);
4799     assert(str != NULL);
4800     printf("%s\n", qstring_get_str(str));
4801     qobject_unref(obj);
4802     visit_free(v);
4803     qobject_unref(str);
4804 }
4805 
4806 static int img_measure(int argc, char **argv)
4807 {
4808     static const struct option long_options[] = {
4809         {"help", no_argument, 0, 'h'},
4810         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4811         {"object", required_argument, 0, OPTION_OBJECT},
4812         {"output", required_argument, 0, OPTION_OUTPUT},
4813         {"size", required_argument, 0, OPTION_SIZE},
4814         {"force-share", no_argument, 0, 'U'},
4815         {0, 0, 0, 0}
4816     };
4817     OutputFormat output_format = OFORMAT_HUMAN;
4818     BlockBackend *in_blk = NULL;
4819     BlockDriver *drv;
4820     const char *filename = NULL;
4821     const char *fmt = NULL;
4822     const char *out_fmt = "raw";
4823     char *options = NULL;
4824     char *snapshot_name = NULL;
4825     bool force_share = false;
4826     QemuOpts *opts = NULL;
4827     QemuOpts *object_opts = NULL;
4828     QemuOpts *sn_opts = NULL;
4829     QemuOptsList *create_opts = NULL;
4830     bool image_opts = false;
4831     uint64_t img_size = UINT64_MAX;
4832     BlockMeasureInfo *info = NULL;
4833     Error *local_err = NULL;
4834     int ret = 1;
4835     int c;
4836 
4837     while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
4838                             long_options, NULL)) != -1) {
4839         switch (c) {
4840         case '?':
4841         case 'h':
4842             help();
4843             break;
4844         case 'f':
4845             fmt = optarg;
4846             break;
4847         case 'O':
4848             out_fmt = optarg;
4849             break;
4850         case 'o':
4851             if (!is_valid_option_list(optarg)) {
4852                 error_report("Invalid option list: %s", optarg);
4853                 goto out;
4854             }
4855             if (!options) {
4856                 options = g_strdup(optarg);
4857             } else {
4858                 char *old_options = options;
4859                 options = g_strdup_printf("%s,%s", options, optarg);
4860                 g_free(old_options);
4861             }
4862             break;
4863         case 'l':
4864             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
4865                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
4866                                                   optarg, false);
4867                 if (!sn_opts) {
4868                     error_report("Failed in parsing snapshot param '%s'",
4869                                  optarg);
4870                     goto out;
4871                 }
4872             } else {
4873                 snapshot_name = optarg;
4874             }
4875             break;
4876         case 'U':
4877             force_share = true;
4878             break;
4879         case OPTION_OBJECT:
4880             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
4881                                                   optarg, true);
4882             if (!object_opts) {
4883                 goto out;
4884             }
4885             break;
4886         case OPTION_IMAGE_OPTS:
4887             image_opts = true;
4888             break;
4889         case OPTION_OUTPUT:
4890             if (!strcmp(optarg, "json")) {
4891                 output_format = OFORMAT_JSON;
4892             } else if (!strcmp(optarg, "human")) {
4893                 output_format = OFORMAT_HUMAN;
4894             } else {
4895                 error_report("--output must be used with human or json "
4896                              "as argument.");
4897                 goto out;
4898             }
4899             break;
4900         case OPTION_SIZE:
4901         {
4902             int64_t sval;
4903 
4904             sval = cvtnum(optarg);
4905             if (sval < 0) {
4906                 if (sval == -ERANGE) {
4907                     error_report("Image size must be less than 8 EiB!");
4908                 } else {
4909                     error_report("Invalid image size specified! You may use "
4910                                  "k, M, G, T, P or E suffixes for ");
4911                     error_report("kilobytes, megabytes, gigabytes, terabytes, "
4912                                  "petabytes and exabytes.");
4913                 }
4914                 goto out;
4915             }
4916             img_size = (uint64_t)sval;
4917         }
4918         break;
4919         }
4920     }
4921 
4922     if (qemu_opts_foreach(&qemu_object_opts,
4923                           user_creatable_add_opts_foreach,
4924                           qemu_img_object_print_help, &error_fatal)) {
4925         goto out;
4926     }
4927 
4928     if (argc - optind > 1) {
4929         error_report("At most one filename argument is allowed.");
4930         goto out;
4931     } else if (argc - optind == 1) {
4932         filename = argv[optind];
4933     }
4934 
4935     if (!filename &&
4936         (object_opts || image_opts || fmt || snapshot_name || sn_opts)) {
4937         error_report("--object, --image-opts, -f, and -l "
4938                      "require a filename argument.");
4939         goto out;
4940     }
4941     if (filename && img_size != UINT64_MAX) {
4942         error_report("--size N cannot be used together with a filename.");
4943         goto out;
4944     }
4945     if (!filename && img_size == UINT64_MAX) {
4946         error_report("Either --size N or one filename must be specified.");
4947         goto out;
4948     }
4949 
4950     if (filename) {
4951         in_blk = img_open(image_opts, filename, fmt, 0,
4952                           false, false, force_share);
4953         if (!in_blk) {
4954             goto out;
4955         }
4956 
4957         if (sn_opts) {
4958             bdrv_snapshot_load_tmp(blk_bs(in_blk),
4959                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
4960                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
4961                     &local_err);
4962         } else if (snapshot_name != NULL) {
4963             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
4964                     snapshot_name, &local_err);
4965         }
4966         if (local_err) {
4967             error_reportf_err(local_err, "Failed to load snapshot: ");
4968             goto out;
4969         }
4970     }
4971 
4972     drv = bdrv_find_format(out_fmt);
4973     if (!drv) {
4974         error_report("Unknown file format '%s'", out_fmt);
4975         goto out;
4976     }
4977     if (!drv->create_opts) {
4978         error_report("Format driver '%s' does not support image creation",
4979                      drv->format_name);
4980         goto out;
4981     }
4982 
4983     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4984     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
4985     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4986     if (options) {
4987         qemu_opts_do_parse(opts, options, NULL, &local_err);
4988         if (local_err) {
4989             error_report_err(local_err);
4990             error_report("Invalid options for file format '%s'", out_fmt);
4991             goto out;
4992         }
4993     }
4994     if (img_size != UINT64_MAX) {
4995         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
4996     }
4997 
4998     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
4999     if (local_err) {
5000         error_report_err(local_err);
5001         goto out;
5002     }
5003 
5004     if (output_format == OFORMAT_HUMAN) {
5005         printf("required size: %" PRIu64 "\n", info->required);
5006         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5007     } else {
5008         dump_json_block_measure_info(info);
5009     }
5010 
5011     ret = 0;
5012 
5013 out:
5014     qapi_free_BlockMeasureInfo(info);
5015     qemu_opts_del(object_opts);
5016     qemu_opts_del(opts);
5017     qemu_opts_del(sn_opts);
5018     qemu_opts_free(create_opts);
5019     g_free(options);
5020     blk_unref(in_blk);
5021     return ret;
5022 }
5023 
5024 static const img_cmd_t img_cmds[] = {
5025 #define DEF(option, callback, arg_string)        \
5026     { option, callback },
5027 #include "qemu-img-cmds.h"
5028 #undef DEF
5029     { NULL, NULL, },
5030 };
5031 
5032 int main(int argc, char **argv)
5033 {
5034     const img_cmd_t *cmd;
5035     const char *cmdname;
5036     Error *local_error = NULL;
5037     char *trace_file = NULL;
5038     int c;
5039     static const struct option long_options[] = {
5040         {"help", no_argument, 0, 'h'},
5041         {"version", no_argument, 0, 'V'},
5042         {"trace", required_argument, NULL, 'T'},
5043         {0, 0, 0, 0}
5044     };
5045 
5046 #ifdef CONFIG_POSIX
5047     signal(SIGPIPE, SIG_IGN);
5048 #endif
5049 
5050     error_init(argv[0]);
5051     module_call_init(MODULE_INIT_TRACE);
5052     qemu_init_exec_dir(argv[0]);
5053 
5054     if (qemu_init_main_loop(&local_error)) {
5055         error_report_err(local_error);
5056         exit(EXIT_FAILURE);
5057     }
5058 
5059     qcrypto_init(&error_fatal);
5060 
5061     module_call_init(MODULE_INIT_QOM);
5062     bdrv_init();
5063     if (argc < 2) {
5064         error_exit("Not enough arguments");
5065     }
5066 
5067     qemu_add_opts(&qemu_object_opts);
5068     qemu_add_opts(&qemu_source_opts);
5069     qemu_add_opts(&qemu_trace_opts);
5070 
5071     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5072         switch (c) {
5073         case ':':
5074             missing_argument(argv[optind - 1]);
5075             return 0;
5076         case '?':
5077             unrecognized_option(argv[optind - 1]);
5078             return 0;
5079         case 'h':
5080             help();
5081             return 0;
5082         case 'V':
5083             printf(QEMU_IMG_VERSION);
5084             return 0;
5085         case 'T':
5086             g_free(trace_file);
5087             trace_file = trace_opt_parse(optarg);
5088             break;
5089         }
5090     }
5091 
5092     cmdname = argv[optind];
5093 
5094     /* reset getopt_long scanning */
5095     argc -= optind;
5096     if (argc < 1) {
5097         return 0;
5098     }
5099     argv += optind;
5100     qemu_reset_optind();
5101 
5102     if (!trace_init_backends()) {
5103         exit(1);
5104     }
5105     trace_init_file(trace_file);
5106     qemu_set_log(LOG_TRACE);
5107 
5108     /* find the command */
5109     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5110         if (!strcmp(cmdname, cmd->name)) {
5111             return cmd->handler(argc, argv);
5112         }
5113     }
5114 
5115     /* not found */
5116     error_exit("Command not found: %s", cmdname);
5117 }
5118