xref: /openbmc/qemu/qemu-img.c (revision 4b9fa0b4)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu-common.h"
29 #include "qemu-version.h"
30 #include "qapi/error.h"
31 #include "qapi/qapi-visit-block-core.h"
32 #include "qapi/qobject-output-visitor.h"
33 #include "qapi/qmp/qjson.h"
34 #include "qapi/qmp/qdict.h"
35 #include "qapi/qmp/qstring.h"
36 #include "qemu/cutils.h"
37 #include "qemu/config-file.h"
38 #include "qemu/option.h"
39 #include "qemu/error-report.h"
40 #include "qemu/log.h"
41 #include "qemu/main-loop.h"
42 #include "qemu/module.h"
43 #include "qemu/units.h"
44 #include "qom/object_interfaces.h"
45 #include "sysemu/block-backend.h"
46 #include "block/block_int.h"
47 #include "block/blockjob.h"
48 #include "block/qapi.h"
49 #include "crypto/init.h"
50 #include "trace/control.h"
51 
52 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
53                           "\n" QEMU_COPYRIGHT "\n"
54 
55 typedef struct img_cmd_t {
56     const char *name;
57     int (*handler)(int argc, char **argv);
58 } img_cmd_t;
59 
60 enum {
61     OPTION_OUTPUT = 256,
62     OPTION_BACKING_CHAIN = 257,
63     OPTION_OBJECT = 258,
64     OPTION_IMAGE_OPTS = 259,
65     OPTION_PATTERN = 260,
66     OPTION_FLUSH_INTERVAL = 261,
67     OPTION_NO_DRAIN = 262,
68     OPTION_TARGET_IMAGE_OPTS = 263,
69     OPTION_SIZE = 264,
70     OPTION_PREALLOCATION = 265,
71     OPTION_SHRINK = 266,
72     OPTION_SALVAGE = 267,
73 };
74 
75 typedef enum OutputFormat {
76     OFORMAT_JSON,
77     OFORMAT_HUMAN,
78 } OutputFormat;
79 
80 /* Default to cache=writeback as data integrity is not important for qemu-img */
81 #define BDRV_DEFAULT_CACHE "writeback"
82 
83 static void format_print(void *opaque, const char *name)
84 {
85     printf(" %s", name);
86 }
87 
88 static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
89 {
90     va_list ap;
91 
92     va_start(ap, fmt);
93     error_vreport(fmt, ap);
94     va_end(ap);
95 
96     error_printf("Try 'qemu-img --help' for more information\n");
97     exit(EXIT_FAILURE);
98 }
99 
100 static void QEMU_NORETURN missing_argument(const char *option)
101 {
102     error_exit("missing argument for option '%s'", option);
103 }
104 
105 static void QEMU_NORETURN unrecognized_option(const char *option)
106 {
107     error_exit("unrecognized option '%s'", option);
108 }
109 
110 /* Please keep in synch with qemu-img.texi */
111 static void QEMU_NORETURN help(void)
112 {
113     const char *help_msg =
114            QEMU_IMG_VERSION
115            "usage: qemu-img [standard options] command [command options]\n"
116            "QEMU disk image utility\n"
117            "\n"
118            "    '-h', '--help'       display this help and exit\n"
119            "    '-V', '--version'    output version information and exit\n"
120            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
121            "                         specify tracing options\n"
122            "\n"
123            "Command syntax:\n"
124 #define DEF(option, callback, arg_string)        \
125            "  " arg_string "\n"
126 #include "qemu-img-cmds.h"
127 #undef DEF
128            "\n"
129            "Command parameters:\n"
130            "  'filename' is a disk image filename\n"
131            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
132            "    manual page for a description of the object properties. The most common\n"
133            "    object type is a 'secret', which is used to supply passwords and/or\n"
134            "    encryption keys.\n"
135            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
136            "  'cache' is the cache mode used to write the output disk image, the valid\n"
137            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
138            "    'directsync' and 'unsafe' (default for convert)\n"
139            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
140            "    options are the same as for the 'cache' option\n"
141            "  'size' is the disk image size in bytes. Optional suffixes\n"
142            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
143            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
144            "    supported. 'b' is ignored.\n"
145            "  'output_filename' is the destination disk image filename\n"
146            "  'output_fmt' is the destination format\n"
147            "  'options' is a comma separated list of format specific options in a\n"
148            "    name=value format. Use -o ? for an overview of the options supported by the\n"
149            "    used format\n"
150            "  'snapshot_param' is param used for internal snapshot, format\n"
151            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
152            "    '[ID_OR_NAME]'\n"
153            "  '-c' indicates that target image must be compressed (qcow format only)\n"
154            "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
155            "       new backing file match exactly. The image doesn't need a working\n"
156            "       backing file before rebasing in this case (useful for renaming the\n"
157            "       backing file). For image creation, allow creating without attempting\n"
158            "       to open the backing file.\n"
159            "  '-h' with or without a command shows this help and lists the supported formats\n"
160            "  '-p' show progress of command (only certain commands)\n"
161            "  '-q' use Quiet mode - do not print any output (except errors)\n"
162            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
163            "       contain only zeros for qemu-img to create a sparse image during\n"
164            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
165            "       unallocated or zero sectors, and the destination image will always be\n"
166            "       fully allocated\n"
167            "  '--output' takes the format in which the output must be done (human or json)\n"
168            "  '-n' skips the target volume creation (useful if the volume is created\n"
169            "       prior to running qemu-img)\n"
170            "\n"
171            "Parameters to check subcommand:\n"
172            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
173            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
174            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
175            "       hiding corruption that has already occurred.\n"
176            "\n"
177            "Parameters to convert subcommand:\n"
178            "  '-m' specifies how many coroutines work in parallel during the convert\n"
179            "       process (defaults to 8)\n"
180            "  '-W' allow to write to the target out of order rather than sequential\n"
181            "\n"
182            "Parameters to snapshot subcommand:\n"
183            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
184            "  '-a' applies a snapshot (revert disk to saved state)\n"
185            "  '-c' creates a snapshot\n"
186            "  '-d' deletes a snapshot\n"
187            "  '-l' lists all snapshots in the given image\n"
188            "\n"
189            "Parameters to compare subcommand:\n"
190            "  '-f' first image format\n"
191            "  '-F' second image format\n"
192            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
193            "\n"
194            "Parameters to dd subcommand:\n"
195            "  'bs=BYTES' read and write up to BYTES bytes at a time "
196            "(default: 512)\n"
197            "  'count=N' copy only N input blocks\n"
198            "  'if=FILE' read from FILE\n"
199            "  'of=FILE' write to FILE\n"
200            "  'skip=N' skip N bs-sized blocks at the start of input\n";
201 
202     printf("%s\nSupported formats:", help_msg);
203     bdrv_iterate_format(format_print, NULL, false);
204     printf("\n\n" QEMU_HELP_BOTTOM "\n");
205     exit(EXIT_SUCCESS);
206 }
207 
208 static QemuOptsList qemu_object_opts = {
209     .name = "object",
210     .implied_opt_name = "qom-type",
211     .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
212     .desc = {
213         { }
214     },
215 };
216 
217 static bool qemu_img_object_print_help(const char *type, QemuOpts *opts)
218 {
219     if (user_creatable_print_help(type, opts)) {
220         exit(0);
221     }
222     return true;
223 }
224 
225 static QemuOptsList qemu_source_opts = {
226     .name = "source",
227     .implied_opt_name = "file",
228     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
229     .desc = {
230         { }
231     },
232 };
233 
234 static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
235 {
236     int ret = 0;
237     if (!quiet) {
238         va_list args;
239         va_start(args, fmt);
240         ret = vprintf(fmt, args);
241         va_end(args);
242     }
243     return ret;
244 }
245 
246 
247 static int print_block_option_help(const char *filename, const char *fmt)
248 {
249     BlockDriver *drv, *proto_drv;
250     QemuOptsList *create_opts = NULL;
251     Error *local_err = NULL;
252 
253     /* Find driver and parse its options */
254     drv = bdrv_find_format(fmt);
255     if (!drv) {
256         error_report("Unknown file format '%s'", fmt);
257         return 1;
258     }
259 
260     if (!drv->create_opts) {
261         error_report("Format driver '%s' does not support image creation", fmt);
262         return 1;
263     }
264 
265     create_opts = qemu_opts_append(create_opts, drv->create_opts);
266     if (filename) {
267         proto_drv = bdrv_find_protocol(filename, true, &local_err);
268         if (!proto_drv) {
269             error_report_err(local_err);
270             qemu_opts_free(create_opts);
271             return 1;
272         }
273         if (!proto_drv->create_opts) {
274             error_report("Protocol driver '%s' does not support image creation",
275                          proto_drv->format_name);
276             qemu_opts_free(create_opts);
277             return 1;
278         }
279         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
280     }
281 
282     if (filename) {
283         printf("Supported options:\n");
284     } else {
285         printf("Supported %s options:\n", fmt);
286     }
287     qemu_opts_print_help(create_opts, false);
288     qemu_opts_free(create_opts);
289 
290     if (!filename) {
291         printf("\n"
292                "The protocol level may support further options.\n"
293                "Specify the target filename to include those options.\n");
294     }
295 
296     return 0;
297 }
298 
299 
300 static BlockBackend *img_open_opts(const char *optstr,
301                                    QemuOpts *opts, int flags, bool writethrough,
302                                    bool quiet, bool force_share)
303 {
304     QDict *options;
305     Error *local_err = NULL;
306     BlockBackend *blk;
307     options = qemu_opts_to_qdict(opts, NULL);
308     if (force_share) {
309         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
310             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
311             error_report("--force-share/-U conflicts with image options");
312             qobject_unref(options);
313             return NULL;
314         }
315         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
316     }
317     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
318     if (!blk) {
319         error_reportf_err(local_err, "Could not open '%s': ", optstr);
320         return NULL;
321     }
322     blk_set_enable_write_cache(blk, !writethrough);
323 
324     return blk;
325 }
326 
327 static BlockBackend *img_open_file(const char *filename,
328                                    QDict *options,
329                                    const char *fmt, int flags,
330                                    bool writethrough, bool quiet,
331                                    bool force_share)
332 {
333     BlockBackend *blk;
334     Error *local_err = NULL;
335 
336     if (!options) {
337         options = qdict_new();
338     }
339     if (fmt) {
340         qdict_put_str(options, "driver", fmt);
341     }
342 
343     if (force_share) {
344         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
345     }
346     blk = blk_new_open(filename, NULL, options, flags, &local_err);
347     if (!blk) {
348         error_reportf_err(local_err, "Could not open '%s': ", filename);
349         return NULL;
350     }
351     blk_set_enable_write_cache(blk, !writethrough);
352 
353     return blk;
354 }
355 
356 
357 static int img_add_key_secrets(void *opaque,
358                                const char *name, const char *value,
359                                Error **errp)
360 {
361     QDict *options = opaque;
362 
363     if (g_str_has_suffix(name, "key-secret")) {
364         qdict_put_str(options, name, value);
365     }
366 
367     return 0;
368 }
369 
370 
371 static BlockBackend *img_open(bool image_opts,
372                               const char *filename,
373                               const char *fmt, int flags, bool writethrough,
374                               bool quiet, bool force_share)
375 {
376     BlockBackend *blk;
377     if (image_opts) {
378         QemuOpts *opts;
379         if (fmt) {
380             error_report("--image-opts and --format are mutually exclusive");
381             return NULL;
382         }
383         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
384                                        filename, true);
385         if (!opts) {
386             return NULL;
387         }
388         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
389                             force_share);
390     } else {
391         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
392                             force_share);
393     }
394     return blk;
395 }
396 
397 
398 static int add_old_style_options(const char *fmt, QemuOpts *opts,
399                                  const char *base_filename,
400                                  const char *base_fmt)
401 {
402     Error *err = NULL;
403 
404     if (base_filename) {
405         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
406         if (err) {
407             error_report("Backing file not supported for file format '%s'",
408                          fmt);
409             error_free(err);
410             return -1;
411         }
412     }
413     if (base_fmt) {
414         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
415         if (err) {
416             error_report("Backing file format not supported for file "
417                          "format '%s'", fmt);
418             error_free(err);
419             return -1;
420         }
421     }
422     return 0;
423 }
424 
425 static int64_t cvtnum(const char *s)
426 {
427     int err;
428     uint64_t value;
429 
430     err = qemu_strtosz(s, NULL, &value);
431     if (err < 0) {
432         return err;
433     }
434     if (value > INT64_MAX) {
435         return -ERANGE;
436     }
437     return value;
438 }
439 
440 static int img_create(int argc, char **argv)
441 {
442     int c;
443     uint64_t img_size = -1;
444     const char *fmt = "raw";
445     const char *base_fmt = NULL;
446     const char *filename;
447     const char *base_filename = NULL;
448     char *options = NULL;
449     Error *local_err = NULL;
450     bool quiet = false;
451     int flags = 0;
452 
453     for(;;) {
454         static const struct option long_options[] = {
455             {"help", no_argument, 0, 'h'},
456             {"object", required_argument, 0, OPTION_OBJECT},
457             {0, 0, 0, 0}
458         };
459         c = getopt_long(argc, argv, ":F:b:f:ho:qu",
460                         long_options, NULL);
461         if (c == -1) {
462             break;
463         }
464         switch(c) {
465         case ':':
466             missing_argument(argv[optind - 1]);
467             break;
468         case '?':
469             unrecognized_option(argv[optind - 1]);
470             break;
471         case 'h':
472             help();
473             break;
474         case 'F':
475             base_fmt = optarg;
476             break;
477         case 'b':
478             base_filename = optarg;
479             break;
480         case 'f':
481             fmt = optarg;
482             break;
483         case 'o':
484             if (!is_valid_option_list(optarg)) {
485                 error_report("Invalid option list: %s", optarg);
486                 goto fail;
487             }
488             if (!options) {
489                 options = g_strdup(optarg);
490             } else {
491                 char *old_options = options;
492                 options = g_strdup_printf("%s,%s", options, optarg);
493                 g_free(old_options);
494             }
495             break;
496         case 'q':
497             quiet = true;
498             break;
499         case 'u':
500             flags |= BDRV_O_NO_BACKING;
501             break;
502         case OPTION_OBJECT: {
503             QemuOpts *opts;
504             opts = qemu_opts_parse_noisily(&qemu_object_opts,
505                                            optarg, true);
506             if (!opts) {
507                 goto fail;
508             }
509         }   break;
510         }
511     }
512 
513     /* Get the filename */
514     filename = (optind < argc) ? argv[optind] : NULL;
515     if (options && has_help_option(options)) {
516         g_free(options);
517         return print_block_option_help(filename, fmt);
518     }
519 
520     if (optind >= argc) {
521         error_exit("Expecting image file name");
522     }
523     optind++;
524 
525     if (qemu_opts_foreach(&qemu_object_opts,
526                           user_creatable_add_opts_foreach,
527                           qemu_img_object_print_help, &error_fatal)) {
528         goto fail;
529     }
530 
531     /* Get image size, if specified */
532     if (optind < argc) {
533         int64_t sval;
534 
535         sval = cvtnum(argv[optind++]);
536         if (sval < 0) {
537             if (sval == -ERANGE) {
538                 error_report("Image size must be less than 8 EiB!");
539             } else {
540                 error_report("Invalid image size specified! You may use k, M, "
541                       "G, T, P or E suffixes for ");
542                 error_report("kilobytes, megabytes, gigabytes, terabytes, "
543                              "petabytes and exabytes.");
544             }
545             goto fail;
546         }
547         img_size = (uint64_t)sval;
548     }
549     if (optind != argc) {
550         error_exit("Unexpected argument: %s", argv[optind]);
551     }
552 
553     bdrv_img_create(filename, fmt, base_filename, base_fmt,
554                     options, img_size, flags, quiet, &local_err);
555     if (local_err) {
556         error_reportf_err(local_err, "%s: ", filename);
557         goto fail;
558     }
559 
560     g_free(options);
561     return 0;
562 
563 fail:
564     g_free(options);
565     return 1;
566 }
567 
568 static void dump_json_image_check(ImageCheck *check, bool quiet)
569 {
570     QString *str;
571     QObject *obj;
572     Visitor *v = qobject_output_visitor_new(&obj);
573 
574     visit_type_ImageCheck(v, NULL, &check, &error_abort);
575     visit_complete(v, &obj);
576     str = qobject_to_json_pretty(obj);
577     assert(str != NULL);
578     qprintf(quiet, "%s\n", qstring_get_str(str));
579     qobject_unref(obj);
580     visit_free(v);
581     qobject_unref(str);
582 }
583 
584 static void dump_human_image_check(ImageCheck *check, bool quiet)
585 {
586     if (!(check->corruptions || check->leaks || check->check_errors)) {
587         qprintf(quiet, "No errors were found on the image.\n");
588     } else {
589         if (check->corruptions) {
590             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
591                     "Data may be corrupted, or further writes to the image "
592                     "may corrupt it.\n",
593                     check->corruptions);
594         }
595 
596         if (check->leaks) {
597             qprintf(quiet,
598                     "\n%" PRId64 " leaked clusters were found on the image.\n"
599                     "This means waste of disk space, but no harm to data.\n",
600                     check->leaks);
601         }
602 
603         if (check->check_errors) {
604             qprintf(quiet,
605                     "\n%" PRId64
606                     " internal errors have occurred during the check.\n",
607                     check->check_errors);
608         }
609     }
610 
611     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
612         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
613                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
614                 check->allocated_clusters, check->total_clusters,
615                 check->allocated_clusters * 100.0 / check->total_clusters,
616                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
617                 check->compressed_clusters * 100.0 /
618                 check->allocated_clusters);
619     }
620 
621     if (check->image_end_offset) {
622         qprintf(quiet,
623                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
624     }
625 }
626 
627 static int collect_image_check(BlockDriverState *bs,
628                    ImageCheck *check,
629                    const char *filename,
630                    const char *fmt,
631                    int fix)
632 {
633     int ret;
634     BdrvCheckResult result;
635 
636     ret = bdrv_check(bs, &result, fix);
637     if (ret < 0) {
638         return ret;
639     }
640 
641     check->filename                 = g_strdup(filename);
642     check->format                   = g_strdup(bdrv_get_format_name(bs));
643     check->check_errors             = result.check_errors;
644     check->corruptions              = result.corruptions;
645     check->has_corruptions          = result.corruptions != 0;
646     check->leaks                    = result.leaks;
647     check->has_leaks                = result.leaks != 0;
648     check->corruptions_fixed        = result.corruptions_fixed;
649     check->has_corruptions_fixed    = result.corruptions != 0;
650     check->leaks_fixed              = result.leaks_fixed;
651     check->has_leaks_fixed          = result.leaks != 0;
652     check->image_end_offset         = result.image_end_offset;
653     check->has_image_end_offset     = result.image_end_offset != 0;
654     check->total_clusters           = result.bfi.total_clusters;
655     check->has_total_clusters       = result.bfi.total_clusters != 0;
656     check->allocated_clusters       = result.bfi.allocated_clusters;
657     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
658     check->fragmented_clusters      = result.bfi.fragmented_clusters;
659     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
660     check->compressed_clusters      = result.bfi.compressed_clusters;
661     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
662 
663     return 0;
664 }
665 
666 /*
667  * Checks an image for consistency. Exit codes:
668  *
669  *  0 - Check completed, image is good
670  *  1 - Check not completed because of internal errors
671  *  2 - Check completed, image is corrupted
672  *  3 - Check completed, image has leaked clusters, but is good otherwise
673  * 63 - Checks are not supported by the image format
674  */
675 static int img_check(int argc, char **argv)
676 {
677     int c, ret;
678     OutputFormat output_format = OFORMAT_HUMAN;
679     const char *filename, *fmt, *output, *cache;
680     BlockBackend *blk;
681     BlockDriverState *bs;
682     int fix = 0;
683     int flags = BDRV_O_CHECK;
684     bool writethrough;
685     ImageCheck *check;
686     bool quiet = false;
687     bool image_opts = false;
688     bool force_share = false;
689 
690     fmt = NULL;
691     output = NULL;
692     cache = BDRV_DEFAULT_CACHE;
693 
694     for(;;) {
695         int option_index = 0;
696         static const struct option long_options[] = {
697             {"help", no_argument, 0, 'h'},
698             {"format", required_argument, 0, 'f'},
699             {"repair", required_argument, 0, 'r'},
700             {"output", required_argument, 0, OPTION_OUTPUT},
701             {"object", required_argument, 0, OPTION_OBJECT},
702             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
703             {"force-share", no_argument, 0, 'U'},
704             {0, 0, 0, 0}
705         };
706         c = getopt_long(argc, argv, ":hf:r:T:qU",
707                         long_options, &option_index);
708         if (c == -1) {
709             break;
710         }
711         switch(c) {
712         case ':':
713             missing_argument(argv[optind - 1]);
714             break;
715         case '?':
716             unrecognized_option(argv[optind - 1]);
717             break;
718         case 'h':
719             help();
720             break;
721         case 'f':
722             fmt = optarg;
723             break;
724         case 'r':
725             flags |= BDRV_O_RDWR;
726 
727             if (!strcmp(optarg, "leaks")) {
728                 fix = BDRV_FIX_LEAKS;
729             } else if (!strcmp(optarg, "all")) {
730                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
731             } else {
732                 error_exit("Unknown option value for -r "
733                            "(expecting 'leaks' or 'all'): %s", optarg);
734             }
735             break;
736         case OPTION_OUTPUT:
737             output = optarg;
738             break;
739         case 'T':
740             cache = optarg;
741             break;
742         case 'q':
743             quiet = true;
744             break;
745         case 'U':
746             force_share = true;
747             break;
748         case OPTION_OBJECT: {
749             QemuOpts *opts;
750             opts = qemu_opts_parse_noisily(&qemu_object_opts,
751                                            optarg, true);
752             if (!opts) {
753                 return 1;
754             }
755         }   break;
756         case OPTION_IMAGE_OPTS:
757             image_opts = true;
758             break;
759         }
760     }
761     if (optind != argc - 1) {
762         error_exit("Expecting one image file name");
763     }
764     filename = argv[optind++];
765 
766     if (output && !strcmp(output, "json")) {
767         output_format = OFORMAT_JSON;
768     } else if (output && !strcmp(output, "human")) {
769         output_format = OFORMAT_HUMAN;
770     } else if (output) {
771         error_report("--output must be used with human or json as argument.");
772         return 1;
773     }
774 
775     if (qemu_opts_foreach(&qemu_object_opts,
776                           user_creatable_add_opts_foreach,
777                           qemu_img_object_print_help, &error_fatal)) {
778         return 1;
779     }
780 
781     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
782     if (ret < 0) {
783         error_report("Invalid source cache option: %s", cache);
784         return 1;
785     }
786 
787     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
788                    force_share);
789     if (!blk) {
790         return 1;
791     }
792     bs = blk_bs(blk);
793 
794     check = g_new0(ImageCheck, 1);
795     ret = collect_image_check(bs, check, filename, fmt, fix);
796 
797     if (ret == -ENOTSUP) {
798         error_report("This image format does not support checks");
799         ret = 63;
800         goto fail;
801     }
802 
803     if (check->corruptions_fixed || check->leaks_fixed) {
804         int corruptions_fixed, leaks_fixed;
805 
806         leaks_fixed         = check->leaks_fixed;
807         corruptions_fixed   = check->corruptions_fixed;
808 
809         if (output_format == OFORMAT_HUMAN) {
810             qprintf(quiet,
811                     "The following inconsistencies were found and repaired:\n\n"
812                     "    %" PRId64 " leaked clusters\n"
813                     "    %" PRId64 " corruptions\n\n"
814                     "Double checking the fixed image now...\n",
815                     check->leaks_fixed,
816                     check->corruptions_fixed);
817         }
818 
819         ret = collect_image_check(bs, check, filename, fmt, 0);
820 
821         check->leaks_fixed          = leaks_fixed;
822         check->corruptions_fixed    = corruptions_fixed;
823     }
824 
825     if (!ret) {
826         switch (output_format) {
827         case OFORMAT_HUMAN:
828             dump_human_image_check(check, quiet);
829             break;
830         case OFORMAT_JSON:
831             dump_json_image_check(check, quiet);
832             break;
833         }
834     }
835 
836     if (ret || check->check_errors) {
837         if (ret) {
838             error_report("Check failed: %s", strerror(-ret));
839         } else {
840             error_report("Check failed");
841         }
842         ret = 1;
843         goto fail;
844     }
845 
846     if (check->corruptions) {
847         ret = 2;
848     } else if (check->leaks) {
849         ret = 3;
850     } else {
851         ret = 0;
852     }
853 
854 fail:
855     qapi_free_ImageCheck(check);
856     blk_unref(blk);
857     return ret;
858 }
859 
860 typedef struct CommonBlockJobCBInfo {
861     BlockDriverState *bs;
862     Error **errp;
863 } CommonBlockJobCBInfo;
864 
865 static void common_block_job_cb(void *opaque, int ret)
866 {
867     CommonBlockJobCBInfo *cbi = opaque;
868 
869     if (ret < 0) {
870         error_setg_errno(cbi->errp, -ret, "Block job failed");
871     }
872 }
873 
874 static void run_block_job(BlockJob *job, Error **errp)
875 {
876     AioContext *aio_context = blk_get_aio_context(job->blk);
877     int ret = 0;
878 
879     aio_context_acquire(aio_context);
880     job_ref(&job->job);
881     do {
882         float progress = 0.0f;
883         aio_poll(aio_context, true);
884         if (job->job.progress_total) {
885             progress = (float)job->job.progress_current /
886                        job->job.progress_total * 100.f;
887         }
888         qemu_progress_print(progress, 0);
889     } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
890 
891     if (!job_is_completed(&job->job)) {
892         ret = job_complete_sync(&job->job, errp);
893     } else {
894         ret = job->job.ret;
895     }
896     job_unref(&job->job);
897     aio_context_release(aio_context);
898 
899     /* publish completion progress only when success */
900     if (!ret) {
901         qemu_progress_print(100.f, 0);
902     }
903 }
904 
905 static int img_commit(int argc, char **argv)
906 {
907     int c, ret, flags;
908     const char *filename, *fmt, *cache, *base;
909     BlockBackend *blk;
910     BlockDriverState *bs, *base_bs;
911     BlockJob *job;
912     bool progress = false, quiet = false, drop = false;
913     bool writethrough;
914     Error *local_err = NULL;
915     CommonBlockJobCBInfo cbi;
916     bool image_opts = false;
917     AioContext *aio_context;
918 
919     fmt = NULL;
920     cache = BDRV_DEFAULT_CACHE;
921     base = NULL;
922     for(;;) {
923         static const struct option long_options[] = {
924             {"help", no_argument, 0, 'h'},
925             {"object", required_argument, 0, OPTION_OBJECT},
926             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
927             {0, 0, 0, 0}
928         };
929         c = getopt_long(argc, argv, ":f:ht:b:dpq",
930                         long_options, NULL);
931         if (c == -1) {
932             break;
933         }
934         switch(c) {
935         case ':':
936             missing_argument(argv[optind - 1]);
937             break;
938         case '?':
939             unrecognized_option(argv[optind - 1]);
940             break;
941         case 'h':
942             help();
943             break;
944         case 'f':
945             fmt = optarg;
946             break;
947         case 't':
948             cache = optarg;
949             break;
950         case 'b':
951             base = optarg;
952             /* -b implies -d */
953             drop = true;
954             break;
955         case 'd':
956             drop = true;
957             break;
958         case 'p':
959             progress = true;
960             break;
961         case 'q':
962             quiet = true;
963             break;
964         case OPTION_OBJECT: {
965             QemuOpts *opts;
966             opts = qemu_opts_parse_noisily(&qemu_object_opts,
967                                            optarg, true);
968             if (!opts) {
969                 return 1;
970             }
971         }   break;
972         case OPTION_IMAGE_OPTS:
973             image_opts = true;
974             break;
975         }
976     }
977 
978     /* Progress is not shown in Quiet mode */
979     if (quiet) {
980         progress = false;
981     }
982 
983     if (optind != argc - 1) {
984         error_exit("Expecting one image file name");
985     }
986     filename = argv[optind++];
987 
988     if (qemu_opts_foreach(&qemu_object_opts,
989                           user_creatable_add_opts_foreach,
990                           qemu_img_object_print_help, &error_fatal)) {
991         return 1;
992     }
993 
994     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
995     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
996     if (ret < 0) {
997         error_report("Invalid cache option: %s", cache);
998         return 1;
999     }
1000 
1001     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1002                    false);
1003     if (!blk) {
1004         return 1;
1005     }
1006     bs = blk_bs(blk);
1007 
1008     qemu_progress_init(progress, 1.f);
1009     qemu_progress_print(0.f, 100);
1010 
1011     if (base) {
1012         base_bs = bdrv_find_backing_image(bs, base);
1013         if (!base_bs) {
1014             error_setg(&local_err,
1015                        "Did not find '%s' in the backing chain of '%s'",
1016                        base, filename);
1017             goto done;
1018         }
1019     } else {
1020         /* This is different from QMP, which by default uses the deepest file in
1021          * the backing chain (i.e., the very base); however, the traditional
1022          * behavior of qemu-img commit is using the immediate backing file. */
1023         base_bs = backing_bs(bs);
1024         if (!base_bs) {
1025             error_setg(&local_err, "Image does not have a backing file");
1026             goto done;
1027         }
1028     }
1029 
1030     cbi = (CommonBlockJobCBInfo){
1031         .errp = &local_err,
1032         .bs   = bs,
1033     };
1034 
1035     aio_context = bdrv_get_aio_context(bs);
1036     aio_context_acquire(aio_context);
1037     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, 0,
1038                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1039                         &cbi, false, &local_err);
1040     aio_context_release(aio_context);
1041     if (local_err) {
1042         goto done;
1043     }
1044 
1045     /* When the block job completes, the BlockBackend reference will point to
1046      * the old backing file. In order to avoid that the top image is already
1047      * deleted, so we can still empty it afterwards, increment the reference
1048      * counter here preemptively. */
1049     if (!drop) {
1050         bdrv_ref(bs);
1051     }
1052 
1053     job = block_job_get("commit");
1054     assert(job);
1055     run_block_job(job, &local_err);
1056     if (local_err) {
1057         goto unref_backing;
1058     }
1059 
1060     if (!drop && bs->drv->bdrv_make_empty) {
1061         ret = bs->drv->bdrv_make_empty(bs);
1062         if (ret) {
1063             error_setg_errno(&local_err, -ret, "Could not empty %s",
1064                              filename);
1065             goto unref_backing;
1066         }
1067     }
1068 
1069 unref_backing:
1070     if (!drop) {
1071         bdrv_unref(bs);
1072     }
1073 
1074 done:
1075     qemu_progress_end();
1076 
1077     blk_unref(blk);
1078 
1079     if (local_err) {
1080         error_report_err(local_err);
1081         return 1;
1082     }
1083 
1084     qprintf(quiet, "Image committed.\n");
1085     return 0;
1086 }
1087 
1088 /*
1089  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1090  * of the first sector boundary within buf where the sector contains a
1091  * non-zero byte.  This function is robust to a buffer that is not
1092  * sector-aligned.
1093  */
1094 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1095 {
1096     int64_t i;
1097     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1098 
1099     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1100         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1101             return i;
1102         }
1103     }
1104     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1105         return i;
1106     }
1107     return -1;
1108 }
1109 
1110 /*
1111  * Returns true iff the first sector pointed to by 'buf' contains at least
1112  * a non-NUL byte.
1113  *
1114  * 'pnum' is set to the number of sectors (including and immediately following
1115  * the first one) that are known to be in the same allocated/unallocated state.
1116  * The function will try to align the end offset to alignment boundaries so
1117  * that the request will at least end aligned and consequtive requests will
1118  * also start at an aligned offset.
1119  */
1120 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1121                                 int64_t sector_num, int alignment)
1122 {
1123     bool is_zero;
1124     int i, tail;
1125 
1126     if (n <= 0) {
1127         *pnum = 0;
1128         return 0;
1129     }
1130     is_zero = buffer_is_zero(buf, 512);
1131     for(i = 1; i < n; i++) {
1132         buf += 512;
1133         if (is_zero != buffer_is_zero(buf, 512)) {
1134             break;
1135         }
1136     }
1137 
1138     tail = (sector_num + i) & (alignment - 1);
1139     if (tail) {
1140         if (is_zero && i <= tail) {
1141             /* treat unallocated areas which only consist
1142              * of a small tail as allocated. */
1143             is_zero = false;
1144         }
1145         if (!is_zero) {
1146             /* align up end offset of allocated areas. */
1147             i += alignment - tail;
1148             i = MIN(i, n);
1149         } else {
1150             /* align down end offset of zero areas. */
1151             i -= tail;
1152         }
1153     }
1154     *pnum = i;
1155     return !is_zero;
1156 }
1157 
1158 /*
1159  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1160  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1161  * breaking up write requests for only small sparse areas.
1162  */
1163 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1164     int min, int64_t sector_num, int alignment)
1165 {
1166     int ret;
1167     int num_checked, num_used;
1168 
1169     if (n < min) {
1170         min = n;
1171     }
1172 
1173     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1174     if (!ret) {
1175         return ret;
1176     }
1177 
1178     num_used = *pnum;
1179     buf += BDRV_SECTOR_SIZE * *pnum;
1180     n -= *pnum;
1181     sector_num += *pnum;
1182     num_checked = num_used;
1183 
1184     while (n > 0) {
1185         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1186 
1187         buf += BDRV_SECTOR_SIZE * *pnum;
1188         n -= *pnum;
1189         sector_num += *pnum;
1190         num_checked += *pnum;
1191         if (ret) {
1192             num_used = num_checked;
1193         } else if (*pnum >= min) {
1194             break;
1195         }
1196     }
1197 
1198     *pnum = num_used;
1199     return 1;
1200 }
1201 
1202 /*
1203  * Compares two buffers sector by sector. Returns 0 if the first
1204  * sector of each buffer matches, non-zero otherwise.
1205  *
1206  * pnum is set to the sector-aligned size of the buffer prefix that
1207  * has the same matching status as the first sector.
1208  */
1209 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1210                            int64_t bytes, int64_t *pnum)
1211 {
1212     bool res;
1213     int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
1214 
1215     assert(bytes > 0);
1216 
1217     res = !!memcmp(buf1, buf2, i);
1218     while (i < bytes) {
1219         int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
1220 
1221         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1222             break;
1223         }
1224         i += len;
1225     }
1226 
1227     *pnum = i;
1228     return res;
1229 }
1230 
1231 #define IO_BUF_SIZE (2 * MiB)
1232 
1233 /*
1234  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1235  *
1236  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1237  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1238  * failure), and 4 on error (the exit status for read errors), after emitting
1239  * an error message.
1240  *
1241  * @param blk:  BlockBackend for the image
1242  * @param offset: Starting offset to check
1243  * @param bytes: Number of bytes to check
1244  * @param filename: Name of disk file we are checking (logging purpose)
1245  * @param buffer: Allocated buffer for storing read data
1246  * @param quiet: Flag for quiet mode
1247  */
1248 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1249                                int64_t bytes, const char *filename,
1250                                uint8_t *buffer, bool quiet)
1251 {
1252     int ret = 0;
1253     int64_t idx;
1254 
1255     ret = blk_pread(blk, offset, buffer, bytes);
1256     if (ret < 0) {
1257         error_report("Error while reading offset %" PRId64 " of %s: %s",
1258                      offset, filename, strerror(-ret));
1259         return 4;
1260     }
1261     idx = find_nonzero(buffer, bytes);
1262     if (idx >= 0) {
1263         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1264                 offset + idx);
1265         return 1;
1266     }
1267 
1268     return 0;
1269 }
1270 
1271 /*
1272  * Compares two images. Exit codes:
1273  *
1274  * 0 - Images are identical
1275  * 1 - Images differ
1276  * >1 - Error occurred
1277  */
1278 static int img_compare(int argc, char **argv)
1279 {
1280     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1281     BlockBackend *blk1, *blk2;
1282     BlockDriverState *bs1, *bs2;
1283     int64_t total_size1, total_size2;
1284     uint8_t *buf1 = NULL, *buf2 = NULL;
1285     int64_t pnum1, pnum2;
1286     int allocated1, allocated2;
1287     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1288     bool progress = false, quiet = false, strict = false;
1289     int flags;
1290     bool writethrough;
1291     int64_t total_size;
1292     int64_t offset = 0;
1293     int64_t chunk;
1294     int c;
1295     uint64_t progress_base;
1296     bool image_opts = false;
1297     bool force_share = false;
1298 
1299     cache = BDRV_DEFAULT_CACHE;
1300     for (;;) {
1301         static const struct option long_options[] = {
1302             {"help", no_argument, 0, 'h'},
1303             {"object", required_argument, 0, OPTION_OBJECT},
1304             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1305             {"force-share", no_argument, 0, 'U'},
1306             {0, 0, 0, 0}
1307         };
1308         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1309                         long_options, NULL);
1310         if (c == -1) {
1311             break;
1312         }
1313         switch (c) {
1314         case ':':
1315             missing_argument(argv[optind - 1]);
1316             break;
1317         case '?':
1318             unrecognized_option(argv[optind - 1]);
1319             break;
1320         case 'h':
1321             help();
1322             break;
1323         case 'f':
1324             fmt1 = optarg;
1325             break;
1326         case 'F':
1327             fmt2 = optarg;
1328             break;
1329         case 'T':
1330             cache = optarg;
1331             break;
1332         case 'p':
1333             progress = true;
1334             break;
1335         case 'q':
1336             quiet = true;
1337             break;
1338         case 's':
1339             strict = true;
1340             break;
1341         case 'U':
1342             force_share = true;
1343             break;
1344         case OPTION_OBJECT: {
1345             QemuOpts *opts;
1346             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1347                                            optarg, true);
1348             if (!opts) {
1349                 ret = 2;
1350                 goto out4;
1351             }
1352         }   break;
1353         case OPTION_IMAGE_OPTS:
1354             image_opts = true;
1355             break;
1356         }
1357     }
1358 
1359     /* Progress is not shown in Quiet mode */
1360     if (quiet) {
1361         progress = false;
1362     }
1363 
1364 
1365     if (optind != argc - 2) {
1366         error_exit("Expecting two image file names");
1367     }
1368     filename1 = argv[optind++];
1369     filename2 = argv[optind++];
1370 
1371     if (qemu_opts_foreach(&qemu_object_opts,
1372                           user_creatable_add_opts_foreach,
1373                           qemu_img_object_print_help, &error_fatal)) {
1374         ret = 2;
1375         goto out4;
1376     }
1377 
1378     /* Initialize before goto out */
1379     qemu_progress_init(progress, 2.0);
1380 
1381     flags = 0;
1382     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1383     if (ret < 0) {
1384         error_report("Invalid source cache option: %s", cache);
1385         ret = 2;
1386         goto out3;
1387     }
1388 
1389     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1390                     force_share);
1391     if (!blk1) {
1392         ret = 2;
1393         goto out3;
1394     }
1395 
1396     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1397                     force_share);
1398     if (!blk2) {
1399         ret = 2;
1400         goto out2;
1401     }
1402     bs1 = blk_bs(blk1);
1403     bs2 = blk_bs(blk2);
1404 
1405     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1406     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1407     total_size1 = blk_getlength(blk1);
1408     if (total_size1 < 0) {
1409         error_report("Can't get size of %s: %s",
1410                      filename1, strerror(-total_size1));
1411         ret = 4;
1412         goto out;
1413     }
1414     total_size2 = blk_getlength(blk2);
1415     if (total_size2 < 0) {
1416         error_report("Can't get size of %s: %s",
1417                      filename2, strerror(-total_size2));
1418         ret = 4;
1419         goto out;
1420     }
1421     total_size = MIN(total_size1, total_size2);
1422     progress_base = MAX(total_size1, total_size2);
1423 
1424     qemu_progress_print(0, 100);
1425 
1426     if (strict && total_size1 != total_size2) {
1427         ret = 1;
1428         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1429         goto out;
1430     }
1431 
1432     while (offset < total_size) {
1433         int status1, status2;
1434 
1435         status1 = bdrv_block_status_above(bs1, NULL, offset,
1436                                           total_size1 - offset, &pnum1, NULL,
1437                                           NULL);
1438         if (status1 < 0) {
1439             ret = 3;
1440             error_report("Sector allocation test failed for %s", filename1);
1441             goto out;
1442         }
1443         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1444 
1445         status2 = bdrv_block_status_above(bs2, NULL, offset,
1446                                           total_size2 - offset, &pnum2, NULL,
1447                                           NULL);
1448         if (status2 < 0) {
1449             ret = 3;
1450             error_report("Sector allocation test failed for %s", filename2);
1451             goto out;
1452         }
1453         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1454 
1455         assert(pnum1 && pnum2);
1456         chunk = MIN(pnum1, pnum2);
1457 
1458         if (strict) {
1459             if (status1 != status2) {
1460                 ret = 1;
1461                 qprintf(quiet, "Strict mode: Offset %" PRId64
1462                         " block status mismatch!\n", offset);
1463                 goto out;
1464             }
1465         }
1466         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1467             /* nothing to do */
1468         } else if (allocated1 == allocated2) {
1469             if (allocated1) {
1470                 int64_t pnum;
1471 
1472                 chunk = MIN(chunk, IO_BUF_SIZE);
1473                 ret = blk_pread(blk1, offset, buf1, chunk);
1474                 if (ret < 0) {
1475                     error_report("Error while reading offset %" PRId64
1476                                  " of %s: %s",
1477                                  offset, filename1, strerror(-ret));
1478                     ret = 4;
1479                     goto out;
1480                 }
1481                 ret = blk_pread(blk2, offset, buf2, chunk);
1482                 if (ret < 0) {
1483                     error_report("Error while reading offset %" PRId64
1484                                  " of %s: %s",
1485                                  offset, filename2, strerror(-ret));
1486                     ret = 4;
1487                     goto out;
1488                 }
1489                 ret = compare_buffers(buf1, buf2, chunk, &pnum);
1490                 if (ret || pnum != chunk) {
1491                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1492                             offset + (ret ? 0 : pnum));
1493                     ret = 1;
1494                     goto out;
1495                 }
1496             }
1497         } else {
1498             chunk = MIN(chunk, IO_BUF_SIZE);
1499             if (allocated1) {
1500                 ret = check_empty_sectors(blk1, offset, chunk,
1501                                           filename1, buf1, quiet);
1502             } else {
1503                 ret = check_empty_sectors(blk2, offset, chunk,
1504                                           filename2, buf1, quiet);
1505             }
1506             if (ret) {
1507                 goto out;
1508             }
1509         }
1510         offset += chunk;
1511         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1512     }
1513 
1514     if (total_size1 != total_size2) {
1515         BlockBackend *blk_over;
1516         const char *filename_over;
1517 
1518         qprintf(quiet, "Warning: Image size mismatch!\n");
1519         if (total_size1 > total_size2) {
1520             blk_over = blk1;
1521             filename_over = filename1;
1522         } else {
1523             blk_over = blk2;
1524             filename_over = filename2;
1525         }
1526 
1527         while (offset < progress_base) {
1528             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1529                                           progress_base - offset, &chunk,
1530                                           NULL, NULL);
1531             if (ret < 0) {
1532                 ret = 3;
1533                 error_report("Sector allocation test failed for %s",
1534                              filename_over);
1535                 goto out;
1536 
1537             }
1538             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1539                 chunk = MIN(chunk, IO_BUF_SIZE);
1540                 ret = check_empty_sectors(blk_over, offset, chunk,
1541                                           filename_over, buf1, quiet);
1542                 if (ret) {
1543                     goto out;
1544                 }
1545             }
1546             offset += chunk;
1547             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1548         }
1549     }
1550 
1551     qprintf(quiet, "Images are identical.\n");
1552     ret = 0;
1553 
1554 out:
1555     qemu_vfree(buf1);
1556     qemu_vfree(buf2);
1557     blk_unref(blk2);
1558 out2:
1559     blk_unref(blk1);
1560 out3:
1561     qemu_progress_end();
1562 out4:
1563     return ret;
1564 }
1565 
1566 enum ImgConvertBlockStatus {
1567     BLK_DATA,
1568     BLK_ZERO,
1569     BLK_BACKING_FILE,
1570 };
1571 
1572 #define MAX_COROUTINES 16
1573 
1574 typedef struct ImgConvertState {
1575     BlockBackend **src;
1576     int64_t *src_sectors;
1577     int src_num;
1578     int64_t total_sectors;
1579     int64_t allocated_sectors;
1580     int64_t allocated_done;
1581     int64_t sector_num;
1582     int64_t wr_offs;
1583     enum ImgConvertBlockStatus status;
1584     int64_t sector_next_status;
1585     BlockBackend *target;
1586     bool has_zero_init;
1587     bool compressed;
1588     bool unallocated_blocks_are_zero;
1589     bool target_is_new;
1590     bool target_has_backing;
1591     int64_t target_backing_sectors; /* negative if unknown */
1592     bool wr_in_order;
1593     bool copy_range;
1594     bool salvage;
1595     bool quiet;
1596     int min_sparse;
1597     int alignment;
1598     size_t cluster_sectors;
1599     size_t buf_sectors;
1600     long num_coroutines;
1601     int running_coroutines;
1602     Coroutine *co[MAX_COROUTINES];
1603     int64_t wait_sector_num[MAX_COROUTINES];
1604     CoMutex lock;
1605     int ret;
1606 } ImgConvertState;
1607 
1608 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1609                                 int *src_cur, int64_t *src_cur_offset)
1610 {
1611     *src_cur = 0;
1612     *src_cur_offset = 0;
1613     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1614         *src_cur_offset += s->src_sectors[*src_cur];
1615         (*src_cur)++;
1616         assert(*src_cur < s->src_num);
1617     }
1618 }
1619 
1620 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1621 {
1622     int64_t src_cur_offset;
1623     int ret, n, src_cur;
1624     bool post_backing_zero = false;
1625 
1626     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1627 
1628     assert(s->total_sectors > sector_num);
1629     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1630 
1631     if (s->target_backing_sectors >= 0) {
1632         if (sector_num >= s->target_backing_sectors) {
1633             post_backing_zero = s->unallocated_blocks_are_zero;
1634         } else if (sector_num + n > s->target_backing_sectors) {
1635             /* Split requests around target_backing_sectors (because
1636              * starting from there, zeros are handled differently) */
1637             n = s->target_backing_sectors - sector_num;
1638         }
1639     }
1640 
1641     if (s->sector_next_status <= sector_num) {
1642         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1643         int64_t count;
1644 
1645         do {
1646             count = n * BDRV_SECTOR_SIZE;
1647 
1648             if (s->target_has_backing) {
1649                 ret = bdrv_block_status(blk_bs(s->src[src_cur]), offset,
1650                                         count, &count, NULL, NULL);
1651             } else {
1652                 ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL,
1653                                               offset, count, &count, NULL,
1654                                               NULL);
1655             }
1656 
1657             if (ret < 0) {
1658                 if (s->salvage) {
1659                     if (n == 1) {
1660                         if (!s->quiet) {
1661                             warn_report("error while reading block status at "
1662                                         "offset %" PRIu64 ": %s", offset,
1663                                         strerror(-ret));
1664                         }
1665                         /* Just try to read the data, then */
1666                         ret = BDRV_BLOCK_DATA;
1667                         count = BDRV_SECTOR_SIZE;
1668                     } else {
1669                         /* Retry on a shorter range */
1670                         n = DIV_ROUND_UP(n, 4);
1671                     }
1672                 } else {
1673                     error_report("error while reading block status at offset "
1674                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1675                     return ret;
1676                 }
1677             }
1678         } while (ret < 0);
1679 
1680         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1681 
1682         if (ret & BDRV_BLOCK_ZERO) {
1683             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1684         } else if (ret & BDRV_BLOCK_DATA) {
1685             s->status = BLK_DATA;
1686         } else {
1687             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1688         }
1689 
1690         s->sector_next_status = sector_num + n;
1691     }
1692 
1693     n = MIN(n, s->sector_next_status - sector_num);
1694     if (s->status == BLK_DATA) {
1695         n = MIN(n, s->buf_sectors);
1696     }
1697 
1698     /* We need to write complete clusters for compressed images, so if an
1699      * unallocated area is shorter than that, we must consider the whole
1700      * cluster allocated. */
1701     if (s->compressed) {
1702         if (n < s->cluster_sectors) {
1703             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1704             s->status = BLK_DATA;
1705         } else {
1706             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1707         }
1708     }
1709 
1710     return n;
1711 }
1712 
1713 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1714                                         int nb_sectors, uint8_t *buf)
1715 {
1716     uint64_t single_read_until = 0;
1717     int n, ret;
1718 
1719     assert(nb_sectors <= s->buf_sectors);
1720     while (nb_sectors > 0) {
1721         BlockBackend *blk;
1722         int src_cur;
1723         int64_t bs_sectors, src_cur_offset;
1724         uint64_t offset;
1725 
1726         /* In the case of compression with multiple source files, we can get a
1727          * nb_sectors that spreads into the next part. So we must be able to
1728          * read across multiple BDSes for one convert_read() call. */
1729         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1730         blk = s->src[src_cur];
1731         bs_sectors = s->src_sectors[src_cur];
1732 
1733         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1734 
1735         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1736         if (single_read_until > offset) {
1737             n = 1;
1738         }
1739 
1740         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1741         if (ret < 0) {
1742             if (s->salvage) {
1743                 if (n > 1) {
1744                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1745                     continue;
1746                 } else {
1747                     if (!s->quiet) {
1748                         warn_report("error while reading offset %" PRIu64
1749                                     ": %s", offset, strerror(-ret));
1750                     }
1751                     memset(buf, 0, BDRV_SECTOR_SIZE);
1752                 }
1753             } else {
1754                 return ret;
1755             }
1756         }
1757 
1758         sector_num += n;
1759         nb_sectors -= n;
1760         buf += n * BDRV_SECTOR_SIZE;
1761     }
1762 
1763     return 0;
1764 }
1765 
1766 
1767 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1768                                          int nb_sectors, uint8_t *buf,
1769                                          enum ImgConvertBlockStatus status)
1770 {
1771     int ret;
1772 
1773     while (nb_sectors > 0) {
1774         int n = nb_sectors;
1775         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1776 
1777         switch (status) {
1778         case BLK_BACKING_FILE:
1779             /* If we have a backing file, leave clusters unallocated that are
1780              * unallocated in the source image, so that the backing file is
1781              * visible at the respective offset. */
1782             assert(s->target_has_backing);
1783             break;
1784 
1785         case BLK_DATA:
1786             /* If we're told to keep the target fully allocated (-S 0) or there
1787              * is real non-zero data, we must write it. Otherwise we can treat
1788              * it as zero sectors.
1789              * Compressed clusters need to be written as a whole, so in that
1790              * case we can only save the write if the buffer is completely
1791              * zeroed. */
1792             if (!s->min_sparse ||
1793                 (!s->compressed &&
1794                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1795                                           sector_num, s->alignment)) ||
1796                 (s->compressed &&
1797                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1798             {
1799                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1800                                     n << BDRV_SECTOR_BITS, buf, flags);
1801                 if (ret < 0) {
1802                     return ret;
1803                 }
1804                 break;
1805             }
1806             /* fall-through */
1807 
1808         case BLK_ZERO:
1809             if (s->has_zero_init) {
1810                 assert(!s->target_has_backing);
1811                 break;
1812             }
1813             ret = blk_co_pwrite_zeroes(s->target,
1814                                        sector_num << BDRV_SECTOR_BITS,
1815                                        n << BDRV_SECTOR_BITS,
1816                                        BDRV_REQ_MAY_UNMAP);
1817             if (ret < 0) {
1818                 return ret;
1819             }
1820             break;
1821         }
1822 
1823         sector_num += n;
1824         nb_sectors -= n;
1825         buf += n * BDRV_SECTOR_SIZE;
1826     }
1827 
1828     return 0;
1829 }
1830 
1831 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1832                                               int nb_sectors)
1833 {
1834     int n, ret;
1835 
1836     while (nb_sectors > 0) {
1837         BlockBackend *blk;
1838         int src_cur;
1839         int64_t bs_sectors, src_cur_offset;
1840         int64_t offset;
1841 
1842         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1843         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1844         blk = s->src[src_cur];
1845         bs_sectors = s->src_sectors[src_cur];
1846 
1847         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1848 
1849         ret = blk_co_copy_range(blk, offset, s->target,
1850                                 sector_num << BDRV_SECTOR_BITS,
1851                                 n << BDRV_SECTOR_BITS, 0, 0);
1852         if (ret < 0) {
1853             return ret;
1854         }
1855 
1856         sector_num += n;
1857         nb_sectors -= n;
1858     }
1859     return 0;
1860 }
1861 
1862 static void coroutine_fn convert_co_do_copy(void *opaque)
1863 {
1864     ImgConvertState *s = opaque;
1865     uint8_t *buf = NULL;
1866     int ret, i;
1867     int index = -1;
1868 
1869     for (i = 0; i < s->num_coroutines; i++) {
1870         if (s->co[i] == qemu_coroutine_self()) {
1871             index = i;
1872             break;
1873         }
1874     }
1875     assert(index >= 0);
1876 
1877     s->running_coroutines++;
1878     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1879 
1880     while (1) {
1881         int n;
1882         int64_t sector_num;
1883         enum ImgConvertBlockStatus status;
1884         bool copy_range;
1885 
1886         qemu_co_mutex_lock(&s->lock);
1887         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1888             qemu_co_mutex_unlock(&s->lock);
1889             break;
1890         }
1891         n = convert_iteration_sectors(s, s->sector_num);
1892         if (n < 0) {
1893             qemu_co_mutex_unlock(&s->lock);
1894             s->ret = n;
1895             break;
1896         }
1897         /* save current sector and allocation status to local variables */
1898         sector_num = s->sector_num;
1899         status = s->status;
1900         if (!s->min_sparse && s->status == BLK_ZERO) {
1901             n = MIN(n, s->buf_sectors);
1902         }
1903         /* increment global sector counter so that other coroutines can
1904          * already continue reading beyond this request */
1905         s->sector_num += n;
1906         qemu_co_mutex_unlock(&s->lock);
1907 
1908         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
1909             s->allocated_done += n;
1910             qemu_progress_print(100.0 * s->allocated_done /
1911                                         s->allocated_sectors, 0);
1912         }
1913 
1914 retry:
1915         copy_range = s->copy_range && s->status == BLK_DATA;
1916         if (status == BLK_DATA && !copy_range) {
1917             ret = convert_co_read(s, sector_num, n, buf);
1918             if (ret < 0) {
1919                 error_report("error while reading sector %" PRId64
1920                              ": %s", sector_num, strerror(-ret));
1921                 s->ret = ret;
1922             }
1923         } else if (!s->min_sparse && status == BLK_ZERO) {
1924             status = BLK_DATA;
1925             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
1926         }
1927 
1928         if (s->wr_in_order) {
1929             /* keep writes in order */
1930             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
1931                 s->wait_sector_num[index] = sector_num;
1932                 qemu_coroutine_yield();
1933             }
1934             s->wait_sector_num[index] = -1;
1935         }
1936 
1937         if (s->ret == -EINPROGRESS) {
1938             if (copy_range) {
1939                 ret = convert_co_copy_range(s, sector_num, n);
1940                 if (ret) {
1941                     s->copy_range = false;
1942                     goto retry;
1943                 }
1944             } else {
1945                 ret = convert_co_write(s, sector_num, n, buf, status);
1946             }
1947             if (ret < 0) {
1948                 error_report("error while writing sector %" PRId64
1949                              ": %s", sector_num, strerror(-ret));
1950                 s->ret = ret;
1951             }
1952         }
1953 
1954         if (s->wr_in_order) {
1955             /* reenter the coroutine that might have waited
1956              * for this write to complete */
1957             s->wr_offs = sector_num + n;
1958             for (i = 0; i < s->num_coroutines; i++) {
1959                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
1960                     /*
1961                      * A -> B -> A cannot occur because A has
1962                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
1963                      * B will never enter A during this time window.
1964                      */
1965                     qemu_coroutine_enter(s->co[i]);
1966                     break;
1967                 }
1968             }
1969         }
1970     }
1971 
1972     qemu_vfree(buf);
1973     s->co[index] = NULL;
1974     s->running_coroutines--;
1975     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
1976         /* the convert job finished successfully */
1977         s->ret = 0;
1978     }
1979 }
1980 
1981 static int convert_do_copy(ImgConvertState *s)
1982 {
1983     int ret, i, n;
1984     int64_t sector_num = 0;
1985 
1986     /* Check whether we have zero initialisation or can get it efficiently */
1987     if (s->target_is_new && s->min_sparse && !s->target_has_backing) {
1988         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
1989     } else {
1990         s->has_zero_init = false;
1991     }
1992 
1993     if (!s->has_zero_init && !s->target_has_backing &&
1994         bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
1995     {
1996         ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
1997         if (ret == 0) {
1998             s->has_zero_init = true;
1999         }
2000     }
2001 
2002     /* Allocate buffer for copied data. For compressed images, only one cluster
2003      * can be copied at a time. */
2004     if (s->compressed) {
2005         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2006             error_report("invalid cluster size");
2007             return -EINVAL;
2008         }
2009         s->buf_sectors = s->cluster_sectors;
2010     }
2011 
2012     while (sector_num < s->total_sectors) {
2013         n = convert_iteration_sectors(s, sector_num);
2014         if (n < 0) {
2015             return n;
2016         }
2017         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2018         {
2019             s->allocated_sectors += n;
2020         }
2021         sector_num += n;
2022     }
2023 
2024     /* Do the copy */
2025     s->sector_next_status = 0;
2026     s->ret = -EINPROGRESS;
2027 
2028     qemu_co_mutex_init(&s->lock);
2029     for (i = 0; i < s->num_coroutines; i++) {
2030         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2031         s->wait_sector_num[i] = -1;
2032         qemu_coroutine_enter(s->co[i]);
2033     }
2034 
2035     while (s->running_coroutines) {
2036         main_loop_wait(false);
2037     }
2038 
2039     if (s->compressed && !s->ret) {
2040         /* signal EOF to align */
2041         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
2042         if (ret < 0) {
2043             return ret;
2044         }
2045     }
2046 
2047     return s->ret;
2048 }
2049 
2050 #define MAX_BUF_SECTORS 32768
2051 
2052 static int img_convert(int argc, char **argv)
2053 {
2054     int c, bs_i, flags, src_flags = 0;
2055     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2056                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2057                *out_filename, *out_baseimg_param, *snapshot_name = NULL;
2058     BlockDriver *drv = NULL, *proto_drv = NULL;
2059     BlockDriverInfo bdi;
2060     BlockDriverState *out_bs;
2061     QemuOpts *opts = NULL, *sn_opts = NULL;
2062     QemuOptsList *create_opts = NULL;
2063     QDict *open_opts = NULL;
2064     char *options = NULL;
2065     Error *local_err = NULL;
2066     bool writethrough, src_writethrough, image_opts = false,
2067          skip_create = false, progress = false, tgt_image_opts = false;
2068     int64_t ret = -EINVAL;
2069     bool force_share = false;
2070     bool explict_min_sparse = false;
2071 
2072     ImgConvertState s = (ImgConvertState) {
2073         /* Need at least 4k of zeros for sparse detection */
2074         .min_sparse         = 8,
2075         .copy_range         = false,
2076         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2077         .wr_in_order        = true,
2078         .num_coroutines     = 8,
2079     };
2080 
2081     for(;;) {
2082         static const struct option long_options[] = {
2083             {"help", no_argument, 0, 'h'},
2084             {"object", required_argument, 0, OPTION_OBJECT},
2085             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2086             {"force-share", no_argument, 0, 'U'},
2087             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2088             {"salvage", no_argument, 0, OPTION_SALVAGE},
2089             {0, 0, 0, 0}
2090         };
2091         c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU",
2092                         long_options, NULL);
2093         if (c == -1) {
2094             break;
2095         }
2096         switch(c) {
2097         case ':':
2098             missing_argument(argv[optind - 1]);
2099             break;
2100         case '?':
2101             unrecognized_option(argv[optind - 1]);
2102             break;
2103         case 'h':
2104             help();
2105             break;
2106         case 'f':
2107             fmt = optarg;
2108             break;
2109         case 'O':
2110             out_fmt = optarg;
2111             break;
2112         case 'B':
2113             out_baseimg = optarg;
2114             break;
2115         case 'C':
2116             s.copy_range = true;
2117             break;
2118         case 'c':
2119             s.compressed = true;
2120             break;
2121         case 'o':
2122             if (!is_valid_option_list(optarg)) {
2123                 error_report("Invalid option list: %s", optarg);
2124                 goto fail_getopt;
2125             }
2126             if (!options) {
2127                 options = g_strdup(optarg);
2128             } else {
2129                 char *old_options = options;
2130                 options = g_strdup_printf("%s,%s", options, optarg);
2131                 g_free(old_options);
2132             }
2133             break;
2134         case 'l':
2135             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2136                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2137                                                   optarg, false);
2138                 if (!sn_opts) {
2139                     error_report("Failed in parsing snapshot param '%s'",
2140                                  optarg);
2141                     goto fail_getopt;
2142                 }
2143             } else {
2144                 snapshot_name = optarg;
2145             }
2146             break;
2147         case 'S':
2148         {
2149             int64_t sval;
2150 
2151             sval = cvtnum(optarg);
2152             if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2153                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2154                 error_report("Invalid buffer size for sparse output specified. "
2155                     "Valid sizes are multiples of %llu up to %llu. Select "
2156                     "0 to disable sparse detection (fully allocates output).",
2157                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2158                 goto fail_getopt;
2159             }
2160 
2161             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2162             explict_min_sparse = true;
2163             break;
2164         }
2165         case 'p':
2166             progress = true;
2167             break;
2168         case 't':
2169             cache = optarg;
2170             break;
2171         case 'T':
2172             src_cache = optarg;
2173             break;
2174         case 'q':
2175             s.quiet = true;
2176             break;
2177         case 'n':
2178             skip_create = true;
2179             break;
2180         case 'm':
2181             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2182                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2183                 error_report("Invalid number of coroutines. Allowed number of"
2184                              " coroutines is between 1 and %d", MAX_COROUTINES);
2185                 goto fail_getopt;
2186             }
2187             break;
2188         case 'W':
2189             s.wr_in_order = false;
2190             break;
2191         case 'U':
2192             force_share = true;
2193             break;
2194         case OPTION_OBJECT: {
2195             QemuOpts *object_opts;
2196             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
2197                                                   optarg, true);
2198             if (!object_opts) {
2199                 goto fail_getopt;
2200             }
2201             break;
2202         }
2203         case OPTION_IMAGE_OPTS:
2204             image_opts = true;
2205             break;
2206         case OPTION_SALVAGE:
2207             s.salvage = true;
2208             break;
2209         case OPTION_TARGET_IMAGE_OPTS:
2210             tgt_image_opts = true;
2211             break;
2212         }
2213     }
2214 
2215     if (!out_fmt && !tgt_image_opts) {
2216         out_fmt = "raw";
2217     }
2218 
2219     if (qemu_opts_foreach(&qemu_object_opts,
2220                           user_creatable_add_opts_foreach,
2221                           qemu_img_object_print_help, &error_fatal)) {
2222         goto fail_getopt;
2223     }
2224 
2225     if (s.compressed && s.copy_range) {
2226         error_report("Cannot enable copy offloading when -c is used");
2227         goto fail_getopt;
2228     }
2229 
2230     if (explict_min_sparse && s.copy_range) {
2231         error_report("Cannot enable copy offloading when -S is used");
2232         goto fail_getopt;
2233     }
2234 
2235     if (s.copy_range && s.salvage) {
2236         error_report("Cannot use copy offloading in salvaging mode");
2237         goto fail_getopt;
2238     }
2239 
2240     if (tgt_image_opts && !skip_create) {
2241         error_report("--target-image-opts requires use of -n flag");
2242         goto fail_getopt;
2243     }
2244 
2245     if (skip_create && options) {
2246         warn_report("-o has no effect when skipping image creation");
2247         warn_report("This will become an error in future QEMU versions.");
2248     }
2249 
2250     s.src_num = argc - optind - 1;
2251     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2252 
2253     if (options && has_help_option(options)) {
2254         if (out_fmt) {
2255             ret = print_block_option_help(out_filename, out_fmt);
2256             goto fail_getopt;
2257         } else {
2258             error_report("Option help requires a format be specified");
2259             goto fail_getopt;
2260         }
2261     }
2262 
2263     if (s.src_num < 1) {
2264         error_report("Must specify image file name");
2265         goto fail_getopt;
2266     }
2267 
2268 
2269     /* ret is still -EINVAL until here */
2270     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2271     if (ret < 0) {
2272         error_report("Invalid source cache option: %s", src_cache);
2273         goto fail_getopt;
2274     }
2275 
2276     /* Initialize before goto out */
2277     if (s.quiet) {
2278         progress = false;
2279     }
2280     qemu_progress_init(progress, 1.0);
2281     qemu_progress_print(0, 100);
2282 
2283     s.src = g_new0(BlockBackend *, s.src_num);
2284     s.src_sectors = g_new(int64_t, s.src_num);
2285 
2286     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2287         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2288                                fmt, src_flags, src_writethrough, s.quiet,
2289                                force_share);
2290         if (!s.src[bs_i]) {
2291             ret = -1;
2292             goto out;
2293         }
2294         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2295         if (s.src_sectors[bs_i] < 0) {
2296             error_report("Could not get size of %s: %s",
2297                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2298             ret = -1;
2299             goto out;
2300         }
2301         s.total_sectors += s.src_sectors[bs_i];
2302     }
2303 
2304     if (sn_opts) {
2305         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2306                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2307                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2308                                &local_err);
2309     } else if (snapshot_name != NULL) {
2310         if (s.src_num > 1) {
2311             error_report("No support for concatenating multiple snapshot");
2312             ret = -1;
2313             goto out;
2314         }
2315 
2316         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2317                                              &local_err);
2318     }
2319     if (local_err) {
2320         error_reportf_err(local_err, "Failed to load snapshot: ");
2321         ret = -1;
2322         goto out;
2323     }
2324 
2325     if (!skip_create) {
2326         /* Find driver and parse its options */
2327         drv = bdrv_find_format(out_fmt);
2328         if (!drv) {
2329             error_report("Unknown file format '%s'", out_fmt);
2330             ret = -1;
2331             goto out;
2332         }
2333 
2334         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2335         if (!proto_drv) {
2336             error_report_err(local_err);
2337             ret = -1;
2338             goto out;
2339         }
2340 
2341         if (!drv->create_opts) {
2342             error_report("Format driver '%s' does not support image creation",
2343                          drv->format_name);
2344             ret = -1;
2345             goto out;
2346         }
2347 
2348         if (!proto_drv->create_opts) {
2349             error_report("Protocol driver '%s' does not support image creation",
2350                          proto_drv->format_name);
2351             ret = -1;
2352             goto out;
2353         }
2354 
2355         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2356         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2357 
2358         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2359         if (options) {
2360             qemu_opts_do_parse(opts, options, NULL, &local_err);
2361             if (local_err) {
2362                 error_report_err(local_err);
2363                 ret = -1;
2364                 goto out;
2365             }
2366         }
2367 
2368         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s.total_sectors * 512,
2369                             &error_abort);
2370         ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2371         if (ret < 0) {
2372             goto out;
2373         }
2374     }
2375 
2376     /* Get backing file name if -o backing_file was used */
2377     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2378     if (out_baseimg_param) {
2379         out_baseimg = out_baseimg_param;
2380     }
2381     s.target_has_backing = (bool) out_baseimg;
2382 
2383     if (s.src_num > 1 && out_baseimg) {
2384         error_report("Having a backing file for the target makes no sense when "
2385                      "concatenating multiple input images");
2386         ret = -1;
2387         goto out;
2388     }
2389 
2390     /* Check if compression is supported */
2391     if (s.compressed) {
2392         bool encryption =
2393             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2394         const char *encryptfmt =
2395             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2396         const char *preallocation =
2397             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2398 
2399         if (drv && !block_driver_can_compress(drv)) {
2400             error_report("Compression not supported for this file format");
2401             ret = -1;
2402             goto out;
2403         }
2404 
2405         if (encryption || encryptfmt) {
2406             error_report("Compression and encryption not supported at "
2407                          "the same time");
2408             ret = -1;
2409             goto out;
2410         }
2411 
2412         if (preallocation
2413             && strcmp(preallocation, "off"))
2414         {
2415             error_report("Compression and preallocation not supported at "
2416                          "the same time");
2417             ret = -1;
2418             goto out;
2419         }
2420     }
2421 
2422     /*
2423      * The later open call will need any decryption secrets, and
2424      * bdrv_create() will purge "opts", so extract them now before
2425      * they are lost.
2426      */
2427     if (!skip_create) {
2428         open_opts = qdict_new();
2429         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2430     }
2431 
2432     if (!skip_create) {
2433         /* Create the new image */
2434         ret = bdrv_create(drv, out_filename, opts, &local_err);
2435         if (ret < 0) {
2436             error_reportf_err(local_err, "%s: error while converting %s: ",
2437                               out_filename, out_fmt);
2438             goto out;
2439         }
2440     }
2441 
2442     s.target_is_new = !skip_create;
2443 
2444     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2445     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2446     if (ret < 0) {
2447         error_report("Invalid cache option: %s", cache);
2448         goto out;
2449     }
2450 
2451     if (skip_create) {
2452         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2453                             flags, writethrough, s.quiet, false);
2454     } else {
2455         /* TODO ultimately we should allow --target-image-opts
2456          * to be used even when -n is not given.
2457          * That has to wait for bdrv_create to be improved
2458          * to allow filenames in option syntax
2459          */
2460         s.target = img_open_file(out_filename, open_opts, out_fmt,
2461                                  flags, writethrough, s.quiet, false);
2462         open_opts = NULL; /* blk_new_open will have freed it */
2463     }
2464     if (!s.target) {
2465         ret = -1;
2466         goto out;
2467     }
2468     out_bs = blk_bs(s.target);
2469 
2470     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2471         error_report("Compression not supported for this file format");
2472         ret = -1;
2473         goto out;
2474     }
2475 
2476     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2477      * or discard_alignment of the out_bs is greater. Limit to
2478      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2479     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2480                         MAX(s.buf_sectors,
2481                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2482                                 out_bs->bl.pdiscard_alignment >>
2483                                 BDRV_SECTOR_BITS)));
2484 
2485     /* try to align the write requests to the destination to avoid unnecessary
2486      * RMW cycles. */
2487     s.alignment = MAX(pow2floor(s.min_sparse),
2488                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2489                                    BDRV_SECTOR_SIZE));
2490     assert(is_power_of_2(s.alignment));
2491 
2492     if (skip_create) {
2493         int64_t output_sectors = blk_nb_sectors(s.target);
2494         if (output_sectors < 0) {
2495             error_report("unable to get output image length: %s",
2496                          strerror(-output_sectors));
2497             ret = -1;
2498             goto out;
2499         } else if (output_sectors < s.total_sectors) {
2500             error_report("output file is smaller than input file");
2501             ret = -1;
2502             goto out;
2503         }
2504     }
2505 
2506     if (s.target_has_backing) {
2507         /* Errors are treated as "backing length unknown" (which means
2508          * s.target_backing_sectors has to be negative, which it will
2509          * be automatically).  The backing file length is used only
2510          * for optimizations, so such a case is not fatal. */
2511         s.target_backing_sectors = bdrv_nb_sectors(out_bs->backing->bs);
2512     } else {
2513         s.target_backing_sectors = -1;
2514     }
2515 
2516     ret = bdrv_get_info(out_bs, &bdi);
2517     if (ret < 0) {
2518         if (s.compressed) {
2519             error_report("could not get block driver info");
2520             goto out;
2521         }
2522     } else {
2523         s.compressed = s.compressed || bdi.needs_compressed_writes;
2524         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2525         s.unallocated_blocks_are_zero = bdi.unallocated_blocks_are_zero;
2526     }
2527 
2528     ret = convert_do_copy(&s);
2529 out:
2530     if (!ret) {
2531         qemu_progress_print(100, 0);
2532     }
2533     qemu_progress_end();
2534     qemu_opts_del(opts);
2535     qemu_opts_free(create_opts);
2536     qemu_opts_del(sn_opts);
2537     qobject_unref(open_opts);
2538     blk_unref(s.target);
2539     if (s.src) {
2540         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2541             blk_unref(s.src[bs_i]);
2542         }
2543         g_free(s.src);
2544     }
2545     g_free(s.src_sectors);
2546 fail_getopt:
2547     g_free(options);
2548 
2549     return !!ret;
2550 }
2551 
2552 
2553 static void dump_snapshots(BlockDriverState *bs)
2554 {
2555     QEMUSnapshotInfo *sn_tab, *sn;
2556     int nb_sns, i;
2557 
2558     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2559     if (nb_sns <= 0)
2560         return;
2561     printf("Snapshot list:\n");
2562     bdrv_snapshot_dump(NULL);
2563     printf("\n");
2564     for(i = 0; i < nb_sns; i++) {
2565         sn = &sn_tab[i];
2566         bdrv_snapshot_dump(sn);
2567         printf("\n");
2568     }
2569     g_free(sn_tab);
2570 }
2571 
2572 static void dump_json_image_info_list(ImageInfoList *list)
2573 {
2574     QString *str;
2575     QObject *obj;
2576     Visitor *v = qobject_output_visitor_new(&obj);
2577 
2578     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2579     visit_complete(v, &obj);
2580     str = qobject_to_json_pretty(obj);
2581     assert(str != NULL);
2582     printf("%s\n", qstring_get_str(str));
2583     qobject_unref(obj);
2584     visit_free(v);
2585     qobject_unref(str);
2586 }
2587 
2588 static void dump_json_image_info(ImageInfo *info)
2589 {
2590     QString *str;
2591     QObject *obj;
2592     Visitor *v = qobject_output_visitor_new(&obj);
2593 
2594     visit_type_ImageInfo(v, NULL, &info, &error_abort);
2595     visit_complete(v, &obj);
2596     str = qobject_to_json_pretty(obj);
2597     assert(str != NULL);
2598     printf("%s\n", qstring_get_str(str));
2599     qobject_unref(obj);
2600     visit_free(v);
2601     qobject_unref(str);
2602 }
2603 
2604 static void dump_human_image_info_list(ImageInfoList *list)
2605 {
2606     ImageInfoList *elem;
2607     bool delim = false;
2608 
2609     for (elem = list; elem; elem = elem->next) {
2610         if (delim) {
2611             printf("\n");
2612         }
2613         delim = true;
2614 
2615         bdrv_image_info_dump(elem->value);
2616     }
2617 }
2618 
2619 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2620 {
2621     return strcmp(a, b) == 0;
2622 }
2623 
2624 /**
2625  * Open an image file chain and return an ImageInfoList
2626  *
2627  * @filename: topmost image filename
2628  * @fmt: topmost image format (may be NULL to autodetect)
2629  * @chain: true  - enumerate entire backing file chain
2630  *         false - only topmost image file
2631  *
2632  * Returns a list of ImageInfo objects or NULL if there was an error opening an
2633  * image file.  If there was an error a message will have been printed to
2634  * stderr.
2635  */
2636 static ImageInfoList *collect_image_info_list(bool image_opts,
2637                                               const char *filename,
2638                                               const char *fmt,
2639                                               bool chain, bool force_share)
2640 {
2641     ImageInfoList *head = NULL;
2642     ImageInfoList **last = &head;
2643     GHashTable *filenames;
2644     Error *err = NULL;
2645 
2646     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2647 
2648     while (filename) {
2649         BlockBackend *blk;
2650         BlockDriverState *bs;
2651         ImageInfo *info;
2652         ImageInfoList *elem;
2653 
2654         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2655             error_report("Backing file '%s' creates an infinite loop.",
2656                          filename);
2657             goto err;
2658         }
2659         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2660 
2661         blk = img_open(image_opts, filename, fmt,
2662                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2663                        force_share);
2664         if (!blk) {
2665             goto err;
2666         }
2667         bs = blk_bs(blk);
2668 
2669         bdrv_query_image_info(bs, &info, &err);
2670         if (err) {
2671             error_report_err(err);
2672             blk_unref(blk);
2673             goto err;
2674         }
2675 
2676         elem = g_new0(ImageInfoList, 1);
2677         elem->value = info;
2678         *last = elem;
2679         last = &elem->next;
2680 
2681         blk_unref(blk);
2682 
2683         filename = fmt = NULL;
2684         if (chain) {
2685             if (info->has_full_backing_filename) {
2686                 filename = info->full_backing_filename;
2687             } else if (info->has_backing_filename) {
2688                 error_report("Could not determine absolute backing filename,"
2689                              " but backing filename '%s' present",
2690                              info->backing_filename);
2691                 goto err;
2692             }
2693             if (info->has_backing_filename_format) {
2694                 fmt = info->backing_filename_format;
2695             }
2696         }
2697     }
2698     g_hash_table_destroy(filenames);
2699     return head;
2700 
2701 err:
2702     qapi_free_ImageInfoList(head);
2703     g_hash_table_destroy(filenames);
2704     return NULL;
2705 }
2706 
2707 static int img_info(int argc, char **argv)
2708 {
2709     int c;
2710     OutputFormat output_format = OFORMAT_HUMAN;
2711     bool chain = false;
2712     const char *filename, *fmt, *output;
2713     ImageInfoList *list;
2714     bool image_opts = false;
2715     bool force_share = false;
2716 
2717     fmt = NULL;
2718     output = NULL;
2719     for(;;) {
2720         int option_index = 0;
2721         static const struct option long_options[] = {
2722             {"help", no_argument, 0, 'h'},
2723             {"format", required_argument, 0, 'f'},
2724             {"output", required_argument, 0, OPTION_OUTPUT},
2725             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2726             {"object", required_argument, 0, OPTION_OBJECT},
2727             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2728             {"force-share", no_argument, 0, 'U'},
2729             {0, 0, 0, 0}
2730         };
2731         c = getopt_long(argc, argv, ":f:hU",
2732                         long_options, &option_index);
2733         if (c == -1) {
2734             break;
2735         }
2736         switch(c) {
2737         case ':':
2738             missing_argument(argv[optind - 1]);
2739             break;
2740         case '?':
2741             unrecognized_option(argv[optind - 1]);
2742             break;
2743         case 'h':
2744             help();
2745             break;
2746         case 'f':
2747             fmt = optarg;
2748             break;
2749         case 'U':
2750             force_share = true;
2751             break;
2752         case OPTION_OUTPUT:
2753             output = optarg;
2754             break;
2755         case OPTION_BACKING_CHAIN:
2756             chain = true;
2757             break;
2758         case OPTION_OBJECT: {
2759             QemuOpts *opts;
2760             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2761                                            optarg, true);
2762             if (!opts) {
2763                 return 1;
2764             }
2765         }   break;
2766         case OPTION_IMAGE_OPTS:
2767             image_opts = true;
2768             break;
2769         }
2770     }
2771     if (optind != argc - 1) {
2772         error_exit("Expecting one image file name");
2773     }
2774     filename = argv[optind++];
2775 
2776     if (output && !strcmp(output, "json")) {
2777         output_format = OFORMAT_JSON;
2778     } else if (output && !strcmp(output, "human")) {
2779         output_format = OFORMAT_HUMAN;
2780     } else if (output) {
2781         error_report("--output must be used with human or json as argument.");
2782         return 1;
2783     }
2784 
2785     if (qemu_opts_foreach(&qemu_object_opts,
2786                           user_creatable_add_opts_foreach,
2787                           qemu_img_object_print_help, &error_fatal)) {
2788         return 1;
2789     }
2790 
2791     list = collect_image_info_list(image_opts, filename, fmt, chain,
2792                                    force_share);
2793     if (!list) {
2794         return 1;
2795     }
2796 
2797     switch (output_format) {
2798     case OFORMAT_HUMAN:
2799         dump_human_image_info_list(list);
2800         break;
2801     case OFORMAT_JSON:
2802         if (chain) {
2803             dump_json_image_info_list(list);
2804         } else {
2805             dump_json_image_info(list->value);
2806         }
2807         break;
2808     }
2809 
2810     qapi_free_ImageInfoList(list);
2811     return 0;
2812 }
2813 
2814 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
2815                           MapEntry *next)
2816 {
2817     switch (output_format) {
2818     case OFORMAT_HUMAN:
2819         if (e->data && !e->has_offset) {
2820             error_report("File contains external, encrypted or compressed clusters.");
2821             return -1;
2822         }
2823         if (e->data && !e->zero) {
2824             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2825                    e->start, e->length,
2826                    e->has_offset ? e->offset : 0,
2827                    e->has_filename ? e->filename : "");
2828         }
2829         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2830          * Modify the flags here to allow more coalescing.
2831          */
2832         if (next && (!next->data || next->zero)) {
2833             next->data = false;
2834             next->zero = true;
2835         }
2836         break;
2837     case OFORMAT_JSON:
2838         printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2839                " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2840                (e->start == 0 ? "[" : ",\n"),
2841                e->start, e->length, e->depth,
2842                e->zero ? "true" : "false",
2843                e->data ? "true" : "false");
2844         if (e->has_offset) {
2845             printf(", \"offset\": %"PRId64"", e->offset);
2846         }
2847         putchar('}');
2848 
2849         if (!next) {
2850             printf("]\n");
2851         }
2852         break;
2853     }
2854     return 0;
2855 }
2856 
2857 static int get_block_status(BlockDriverState *bs, int64_t offset,
2858                             int64_t bytes, MapEntry *e)
2859 {
2860     int ret;
2861     int depth;
2862     BlockDriverState *file;
2863     bool has_offset;
2864     int64_t map;
2865     char *filename = NULL;
2866 
2867     /* As an optimization, we could cache the current range of unallocated
2868      * clusters in each file of the chain, and avoid querying the same
2869      * range repeatedly.
2870      */
2871 
2872     depth = 0;
2873     for (;;) {
2874         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
2875         if (ret < 0) {
2876             return ret;
2877         }
2878         assert(bytes);
2879         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2880             break;
2881         }
2882         bs = backing_bs(bs);
2883         if (bs == NULL) {
2884             ret = 0;
2885             break;
2886         }
2887 
2888         depth++;
2889     }
2890 
2891     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2892 
2893     if (file && has_offset) {
2894         bdrv_refresh_filename(file);
2895         filename = file->filename;
2896     }
2897 
2898     *e = (MapEntry) {
2899         .start = offset,
2900         .length = bytes,
2901         .data = !!(ret & BDRV_BLOCK_DATA),
2902         .zero = !!(ret & BDRV_BLOCK_ZERO),
2903         .offset = map,
2904         .has_offset = has_offset,
2905         .depth = depth,
2906         .has_filename = filename,
2907         .filename = filename,
2908     };
2909 
2910     return 0;
2911 }
2912 
2913 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2914 {
2915     if (curr->length == 0) {
2916         return false;
2917     }
2918     if (curr->zero != next->zero ||
2919         curr->data != next->data ||
2920         curr->depth != next->depth ||
2921         curr->has_filename != next->has_filename ||
2922         curr->has_offset != next->has_offset) {
2923         return false;
2924     }
2925     if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2926         return false;
2927     }
2928     if (curr->has_offset && curr->offset + curr->length != next->offset) {
2929         return false;
2930     }
2931     return true;
2932 }
2933 
2934 static int img_map(int argc, char **argv)
2935 {
2936     int c;
2937     OutputFormat output_format = OFORMAT_HUMAN;
2938     BlockBackend *blk;
2939     BlockDriverState *bs;
2940     const char *filename, *fmt, *output;
2941     int64_t length;
2942     MapEntry curr = { .length = 0 }, next;
2943     int ret = 0;
2944     bool image_opts = false;
2945     bool force_share = false;
2946 
2947     fmt = NULL;
2948     output = NULL;
2949     for (;;) {
2950         int option_index = 0;
2951         static const struct option long_options[] = {
2952             {"help", no_argument, 0, 'h'},
2953             {"format", required_argument, 0, 'f'},
2954             {"output", required_argument, 0, OPTION_OUTPUT},
2955             {"object", required_argument, 0, OPTION_OBJECT},
2956             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2957             {"force-share", no_argument, 0, 'U'},
2958             {0, 0, 0, 0}
2959         };
2960         c = getopt_long(argc, argv, ":f:hU",
2961                         long_options, &option_index);
2962         if (c == -1) {
2963             break;
2964         }
2965         switch (c) {
2966         case ':':
2967             missing_argument(argv[optind - 1]);
2968             break;
2969         case '?':
2970             unrecognized_option(argv[optind - 1]);
2971             break;
2972         case 'h':
2973             help();
2974             break;
2975         case 'f':
2976             fmt = optarg;
2977             break;
2978         case 'U':
2979             force_share = true;
2980             break;
2981         case OPTION_OUTPUT:
2982             output = optarg;
2983             break;
2984         case OPTION_OBJECT: {
2985             QemuOpts *opts;
2986             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2987                                            optarg, true);
2988             if (!opts) {
2989                 return 1;
2990             }
2991         }   break;
2992         case OPTION_IMAGE_OPTS:
2993             image_opts = true;
2994             break;
2995         }
2996     }
2997     if (optind != argc - 1) {
2998         error_exit("Expecting one image file name");
2999     }
3000     filename = argv[optind];
3001 
3002     if (output && !strcmp(output, "json")) {
3003         output_format = OFORMAT_JSON;
3004     } else if (output && !strcmp(output, "human")) {
3005         output_format = OFORMAT_HUMAN;
3006     } else if (output) {
3007         error_report("--output must be used with human or json as argument.");
3008         return 1;
3009     }
3010 
3011     if (qemu_opts_foreach(&qemu_object_opts,
3012                           user_creatable_add_opts_foreach,
3013                           qemu_img_object_print_help, &error_fatal)) {
3014         return 1;
3015     }
3016 
3017     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3018     if (!blk) {
3019         return 1;
3020     }
3021     bs = blk_bs(blk);
3022 
3023     if (output_format == OFORMAT_HUMAN) {
3024         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3025     }
3026 
3027     length = blk_getlength(blk);
3028     while (curr.start + curr.length < length) {
3029         int64_t offset = curr.start + curr.length;
3030         int64_t n;
3031 
3032         /* Probe up to 1 GiB at a time.  */
3033         n = MIN(1 * GiB, length - offset);
3034         ret = get_block_status(bs, offset, n, &next);
3035 
3036         if (ret < 0) {
3037             error_report("Could not read file metadata: %s", strerror(-ret));
3038             goto out;
3039         }
3040 
3041         if (entry_mergeable(&curr, &next)) {
3042             curr.length += next.length;
3043             continue;
3044         }
3045 
3046         if (curr.length > 0) {
3047             ret = dump_map_entry(output_format, &curr, &next);
3048             if (ret < 0) {
3049                 goto out;
3050             }
3051         }
3052         curr = next;
3053     }
3054 
3055     ret = dump_map_entry(output_format, &curr, NULL);
3056 
3057 out:
3058     blk_unref(blk);
3059     return ret < 0;
3060 }
3061 
3062 #define SNAPSHOT_LIST   1
3063 #define SNAPSHOT_CREATE 2
3064 #define SNAPSHOT_APPLY  3
3065 #define SNAPSHOT_DELETE 4
3066 
3067 static int img_snapshot(int argc, char **argv)
3068 {
3069     BlockBackend *blk;
3070     BlockDriverState *bs;
3071     QEMUSnapshotInfo sn;
3072     char *filename, *snapshot_name = NULL;
3073     int c, ret = 0, bdrv_oflags;
3074     int action = 0;
3075     qemu_timeval tv;
3076     bool quiet = false;
3077     Error *err = NULL;
3078     bool image_opts = false;
3079     bool force_share = false;
3080 
3081     bdrv_oflags = BDRV_O_RDWR;
3082     /* Parse commandline parameters */
3083     for(;;) {
3084         static const struct option long_options[] = {
3085             {"help", no_argument, 0, 'h'},
3086             {"object", required_argument, 0, OPTION_OBJECT},
3087             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3088             {"force-share", no_argument, 0, 'U'},
3089             {0, 0, 0, 0}
3090         };
3091         c = getopt_long(argc, argv, ":la:c:d:hqU",
3092                         long_options, NULL);
3093         if (c == -1) {
3094             break;
3095         }
3096         switch(c) {
3097         case ':':
3098             missing_argument(argv[optind - 1]);
3099             break;
3100         case '?':
3101             unrecognized_option(argv[optind - 1]);
3102             break;
3103         case 'h':
3104             help();
3105             return 0;
3106         case 'l':
3107             if (action) {
3108                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3109                 return 0;
3110             }
3111             action = SNAPSHOT_LIST;
3112             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3113             break;
3114         case 'a':
3115             if (action) {
3116                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3117                 return 0;
3118             }
3119             action = SNAPSHOT_APPLY;
3120             snapshot_name = optarg;
3121             break;
3122         case 'c':
3123             if (action) {
3124                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3125                 return 0;
3126             }
3127             action = SNAPSHOT_CREATE;
3128             snapshot_name = optarg;
3129             break;
3130         case 'd':
3131             if (action) {
3132                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3133                 return 0;
3134             }
3135             action = SNAPSHOT_DELETE;
3136             snapshot_name = optarg;
3137             break;
3138         case 'q':
3139             quiet = true;
3140             break;
3141         case 'U':
3142             force_share = true;
3143             break;
3144         case OPTION_OBJECT: {
3145             QemuOpts *opts;
3146             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3147                                            optarg, true);
3148             if (!opts) {
3149                 return 1;
3150             }
3151         }   break;
3152         case OPTION_IMAGE_OPTS:
3153             image_opts = true;
3154             break;
3155         }
3156     }
3157 
3158     if (optind != argc - 1) {
3159         error_exit("Expecting one image file name");
3160     }
3161     filename = argv[optind++];
3162 
3163     if (qemu_opts_foreach(&qemu_object_opts,
3164                           user_creatable_add_opts_foreach,
3165                           qemu_img_object_print_help, &error_fatal)) {
3166         return 1;
3167     }
3168 
3169     /* Open the image */
3170     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3171                    force_share);
3172     if (!blk) {
3173         return 1;
3174     }
3175     bs = blk_bs(blk);
3176 
3177     /* Perform the requested action */
3178     switch(action) {
3179     case SNAPSHOT_LIST:
3180         dump_snapshots(bs);
3181         break;
3182 
3183     case SNAPSHOT_CREATE:
3184         memset(&sn, 0, sizeof(sn));
3185         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3186 
3187         qemu_gettimeofday(&tv);
3188         sn.date_sec = tv.tv_sec;
3189         sn.date_nsec = tv.tv_usec * 1000;
3190 
3191         ret = bdrv_snapshot_create(bs, &sn);
3192         if (ret) {
3193             error_report("Could not create snapshot '%s': %d (%s)",
3194                 snapshot_name, ret, strerror(-ret));
3195         }
3196         break;
3197 
3198     case SNAPSHOT_APPLY:
3199         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3200         if (ret) {
3201             error_reportf_err(err, "Could not apply snapshot '%s': ",
3202                               snapshot_name);
3203         }
3204         break;
3205 
3206     case SNAPSHOT_DELETE:
3207         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3208         if (ret < 0) {
3209             error_report("Could not delete snapshot '%s': snapshot not "
3210                          "found", snapshot_name);
3211             ret = 1;
3212         } else {
3213             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3214             if (ret < 0) {
3215                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3216                                   snapshot_name);
3217                 ret = 1;
3218             }
3219         }
3220         break;
3221     }
3222 
3223     /* Cleanup */
3224     blk_unref(blk);
3225     if (ret) {
3226         return 1;
3227     }
3228     return 0;
3229 }
3230 
3231 static int img_rebase(int argc, char **argv)
3232 {
3233     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3234     uint8_t *buf_old = NULL;
3235     uint8_t *buf_new = NULL;
3236     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3237     char *filename;
3238     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3239     int c, flags, src_flags, ret;
3240     bool writethrough, src_writethrough;
3241     int unsafe = 0;
3242     bool force_share = false;
3243     int progress = 0;
3244     bool quiet = false;
3245     Error *local_err = NULL;
3246     bool image_opts = false;
3247 
3248     /* Parse commandline parameters */
3249     fmt = NULL;
3250     cache = BDRV_DEFAULT_CACHE;
3251     src_cache = BDRV_DEFAULT_CACHE;
3252     out_baseimg = NULL;
3253     out_basefmt = NULL;
3254     for(;;) {
3255         static const struct option long_options[] = {
3256             {"help", no_argument, 0, 'h'},
3257             {"object", required_argument, 0, OPTION_OBJECT},
3258             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3259             {"force-share", no_argument, 0, 'U'},
3260             {0, 0, 0, 0}
3261         };
3262         c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
3263                         long_options, NULL);
3264         if (c == -1) {
3265             break;
3266         }
3267         switch(c) {
3268         case ':':
3269             missing_argument(argv[optind - 1]);
3270             break;
3271         case '?':
3272             unrecognized_option(argv[optind - 1]);
3273             break;
3274         case 'h':
3275             help();
3276             return 0;
3277         case 'f':
3278             fmt = optarg;
3279             break;
3280         case 'F':
3281             out_basefmt = optarg;
3282             break;
3283         case 'b':
3284             out_baseimg = optarg;
3285             break;
3286         case 'u':
3287             unsafe = 1;
3288             break;
3289         case 'p':
3290             progress = 1;
3291             break;
3292         case 't':
3293             cache = optarg;
3294             break;
3295         case 'T':
3296             src_cache = optarg;
3297             break;
3298         case 'q':
3299             quiet = true;
3300             break;
3301         case OPTION_OBJECT: {
3302             QemuOpts *opts;
3303             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3304                                            optarg, true);
3305             if (!opts) {
3306                 return 1;
3307             }
3308         }   break;
3309         case OPTION_IMAGE_OPTS:
3310             image_opts = true;
3311             break;
3312         case 'U':
3313             force_share = true;
3314             break;
3315         }
3316     }
3317 
3318     if (quiet) {
3319         progress = 0;
3320     }
3321 
3322     if (optind != argc - 1) {
3323         error_exit("Expecting one image file name");
3324     }
3325     if (!unsafe && !out_baseimg) {
3326         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3327     }
3328     filename = argv[optind++];
3329 
3330     if (qemu_opts_foreach(&qemu_object_opts,
3331                           user_creatable_add_opts_foreach,
3332                           qemu_img_object_print_help, &error_fatal)) {
3333         return 1;
3334     }
3335 
3336     qemu_progress_init(progress, 2.0);
3337     qemu_progress_print(0, 100);
3338 
3339     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3340     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3341     if (ret < 0) {
3342         error_report("Invalid cache option: %s", cache);
3343         goto out;
3344     }
3345 
3346     src_flags = 0;
3347     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3348     if (ret < 0) {
3349         error_report("Invalid source cache option: %s", src_cache);
3350         goto out;
3351     }
3352 
3353     /* The source files are opened read-only, don't care about WCE */
3354     assert((src_flags & BDRV_O_RDWR) == 0);
3355     (void) src_writethrough;
3356 
3357     /*
3358      * Open the images.
3359      *
3360      * Ignore the old backing file for unsafe rebase in case we want to correct
3361      * the reference to a renamed or moved backing file.
3362      */
3363     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3364                    false);
3365     if (!blk) {
3366         ret = -1;
3367         goto out;
3368     }
3369     bs = blk_bs(blk);
3370 
3371     if (out_basefmt != NULL) {
3372         if (bdrv_find_format(out_basefmt) == NULL) {
3373             error_report("Invalid format name: '%s'", out_basefmt);
3374             ret = -1;
3375             goto out;
3376         }
3377     }
3378 
3379     /* For safe rebasing we need to compare old and new backing file */
3380     if (!unsafe) {
3381         QDict *options = NULL;
3382         BlockDriverState *base_bs = backing_bs(bs);
3383 
3384         if (base_bs) {
3385             blk_old_backing = blk_new(qemu_get_aio_context(),
3386                                       BLK_PERM_CONSISTENT_READ,
3387                                       BLK_PERM_ALL);
3388             ret = blk_insert_bs(blk_old_backing, base_bs,
3389                                 &local_err);
3390             if (ret < 0) {
3391                 error_reportf_err(local_err,
3392                                   "Could not reuse old backing file '%s': ",
3393                                   base_bs->filename);
3394                 goto out;
3395             }
3396         } else {
3397             blk_old_backing = NULL;
3398         }
3399 
3400         if (out_baseimg[0]) {
3401             const char *overlay_filename;
3402             char *out_real_path;
3403 
3404             options = qdict_new();
3405             if (out_basefmt) {
3406                 qdict_put_str(options, "driver", out_basefmt);
3407             }
3408             if (force_share) {
3409                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3410             }
3411 
3412             bdrv_refresh_filename(bs);
3413             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3414                                                      : bs->filename;
3415             out_real_path =
3416                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3417                                                              out_baseimg,
3418                                                              &local_err);
3419             if (local_err) {
3420                 qobject_unref(options);
3421                 error_reportf_err(local_err,
3422                                   "Could not resolve backing filename: ");
3423                 ret = -1;
3424                 goto out;
3425             }
3426 
3427             /*
3428              * Find out whether we rebase an image on top of a previous image
3429              * in its chain.
3430              */
3431             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3432             if (prefix_chain_bs) {
3433                 qobject_unref(options);
3434                 g_free(out_real_path);
3435 
3436                 blk_new_backing = blk_new(qemu_get_aio_context(),
3437                                           BLK_PERM_CONSISTENT_READ,
3438                                           BLK_PERM_ALL);
3439                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3440                                     &local_err);
3441                 if (ret < 0) {
3442                     error_reportf_err(local_err,
3443                                       "Could not reuse backing file '%s': ",
3444                                       out_baseimg);
3445                     goto out;
3446                 }
3447             } else {
3448                 blk_new_backing = blk_new_open(out_real_path, NULL,
3449                                                options, src_flags, &local_err);
3450                 g_free(out_real_path);
3451                 if (!blk_new_backing) {
3452                     error_reportf_err(local_err,
3453                                       "Could not open new backing file '%s': ",
3454                                       out_baseimg);
3455                     ret = -1;
3456                     goto out;
3457                 }
3458             }
3459         }
3460     }
3461 
3462     /*
3463      * Check each unallocated cluster in the COW file. If it is unallocated,
3464      * accesses go to the backing file. We must therefore compare this cluster
3465      * in the old and new backing file, and if they differ we need to copy it
3466      * from the old backing file into the COW file.
3467      *
3468      * If qemu-img crashes during this step, no harm is done. The content of
3469      * the image is the same as the original one at any time.
3470      */
3471     if (!unsafe) {
3472         int64_t size;
3473         int64_t old_backing_size = 0;
3474         int64_t new_backing_size = 0;
3475         uint64_t offset;
3476         int64_t n;
3477         float local_progress = 0;
3478 
3479         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3480         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3481 
3482         size = blk_getlength(blk);
3483         if (size < 0) {
3484             error_report("Could not get size of '%s': %s",
3485                          filename, strerror(-size));
3486             ret = -1;
3487             goto out;
3488         }
3489         if (blk_old_backing) {
3490             old_backing_size = blk_getlength(blk_old_backing);
3491             if (old_backing_size < 0) {
3492                 char backing_name[PATH_MAX];
3493 
3494                 bdrv_get_backing_filename(bs, backing_name,
3495                                           sizeof(backing_name));
3496                 error_report("Could not get size of '%s': %s",
3497                              backing_name, strerror(-old_backing_size));
3498                 ret = -1;
3499                 goto out;
3500             }
3501         }
3502         if (blk_new_backing) {
3503             new_backing_size = blk_getlength(blk_new_backing);
3504             if (new_backing_size < 0) {
3505                 error_report("Could not get size of '%s': %s",
3506                              out_baseimg, strerror(-new_backing_size));
3507                 ret = -1;
3508                 goto out;
3509             }
3510         }
3511 
3512         if (size != 0) {
3513             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3514         }
3515 
3516         for (offset = 0; offset < size; offset += n) {
3517             bool buf_old_is_zero = false;
3518 
3519             /* How many bytes can we handle with the next read? */
3520             n = MIN(IO_BUF_SIZE, size - offset);
3521 
3522             /* If the cluster is allocated, we don't need to take action */
3523             ret = bdrv_is_allocated(bs, offset, n, &n);
3524             if (ret < 0) {
3525                 error_report("error while reading image metadata: %s",
3526                              strerror(-ret));
3527                 goto out;
3528             }
3529             if (ret) {
3530                 continue;
3531             }
3532 
3533             if (prefix_chain_bs) {
3534                 /*
3535                  * If cluster wasn't changed since prefix_chain, we don't need
3536                  * to take action
3537                  */
3538                 ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
3539                                               false, offset, n, &n);
3540                 if (ret < 0) {
3541                     error_report("error while reading image metadata: %s",
3542                                  strerror(-ret));
3543                     goto out;
3544                 }
3545                 if (!ret) {
3546                     continue;
3547                 }
3548             }
3549 
3550             /*
3551              * Read old and new backing file and take into consideration that
3552              * backing files may be smaller than the COW image.
3553              */
3554             if (offset >= old_backing_size) {
3555                 memset(buf_old, 0, n);
3556                 buf_old_is_zero = true;
3557             } else {
3558                 if (offset + n > old_backing_size) {
3559                     n = old_backing_size - offset;
3560                 }
3561 
3562                 ret = blk_pread(blk_old_backing, offset, buf_old, n);
3563                 if (ret < 0) {
3564                     error_report("error while reading from old backing file");
3565                     goto out;
3566                 }
3567             }
3568 
3569             if (offset >= new_backing_size || !blk_new_backing) {
3570                 memset(buf_new, 0, n);
3571             } else {
3572                 if (offset + n > new_backing_size) {
3573                     n = new_backing_size - offset;
3574                 }
3575 
3576                 ret = blk_pread(blk_new_backing, offset, buf_new, n);
3577                 if (ret < 0) {
3578                     error_report("error while reading from new backing file");
3579                     goto out;
3580                 }
3581             }
3582 
3583             /* If they differ, we need to write to the COW file */
3584             uint64_t written = 0;
3585 
3586             while (written < n) {
3587                 int64_t pnum;
3588 
3589                 if (compare_buffers(buf_old + written, buf_new + written,
3590                                     n - written, &pnum))
3591                 {
3592                     if (buf_old_is_zero) {
3593                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
3594                     } else {
3595                         ret = blk_pwrite(blk, offset + written,
3596                                          buf_old + written, pnum, 0);
3597                     }
3598                     if (ret < 0) {
3599                         error_report("Error while writing to COW image: %s",
3600                             strerror(-ret));
3601                         goto out;
3602                     }
3603                 }
3604 
3605                 written += pnum;
3606             }
3607             qemu_progress_print(local_progress, 100);
3608         }
3609     }
3610 
3611     /*
3612      * Change the backing file. All clusters that are different from the old
3613      * backing file are overwritten in the COW file now, so the visible content
3614      * doesn't change when we switch the backing file.
3615      */
3616     if (out_baseimg && *out_baseimg) {
3617         ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3618     } else {
3619         ret = bdrv_change_backing_file(bs, NULL, NULL);
3620     }
3621 
3622     if (ret == -ENOSPC) {
3623         error_report("Could not change the backing file to '%s': No "
3624                      "space left in the file header", out_baseimg);
3625     } else if (ret < 0) {
3626         error_report("Could not change the backing file to '%s': %s",
3627             out_baseimg, strerror(-ret));
3628     }
3629 
3630     qemu_progress_print(100, 0);
3631     /*
3632      * TODO At this point it is possible to check if any clusters that are
3633      * allocated in the COW file are the same in the backing file. If so, they
3634      * could be dropped from the COW file. Don't do this before switching the
3635      * backing file, in case of a crash this would lead to corruption.
3636      */
3637 out:
3638     qemu_progress_end();
3639     /* Cleanup */
3640     if (!unsafe) {
3641         blk_unref(blk_old_backing);
3642         blk_unref(blk_new_backing);
3643     }
3644     qemu_vfree(buf_old);
3645     qemu_vfree(buf_new);
3646 
3647     blk_unref(blk);
3648     if (ret) {
3649         return 1;
3650     }
3651     return 0;
3652 }
3653 
3654 static int img_resize(int argc, char **argv)
3655 {
3656     Error *err = NULL;
3657     int c, ret, relative;
3658     const char *filename, *fmt, *size;
3659     int64_t n, total_size, current_size, new_size;
3660     bool quiet = false;
3661     BlockBackend *blk = NULL;
3662     PreallocMode prealloc = PREALLOC_MODE_OFF;
3663     QemuOpts *param;
3664 
3665     static QemuOptsList resize_options = {
3666         .name = "resize_options",
3667         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3668         .desc = {
3669             {
3670                 .name = BLOCK_OPT_SIZE,
3671                 .type = QEMU_OPT_SIZE,
3672                 .help = "Virtual disk size"
3673             }, {
3674                 /* end of list */
3675             }
3676         },
3677     };
3678     bool image_opts = false;
3679     bool shrink = false;
3680 
3681     /* Remove size from argv manually so that negative numbers are not treated
3682      * as options by getopt. */
3683     if (argc < 3) {
3684         error_exit("Not enough arguments");
3685         return 1;
3686     }
3687 
3688     size = argv[--argc];
3689 
3690     /* Parse getopt arguments */
3691     fmt = NULL;
3692     for(;;) {
3693         static const struct option long_options[] = {
3694             {"help", no_argument, 0, 'h'},
3695             {"object", required_argument, 0, OPTION_OBJECT},
3696             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3697             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
3698             {"shrink", no_argument, 0, OPTION_SHRINK},
3699             {0, 0, 0, 0}
3700         };
3701         c = getopt_long(argc, argv, ":f:hq",
3702                         long_options, NULL);
3703         if (c == -1) {
3704             break;
3705         }
3706         switch(c) {
3707         case ':':
3708             missing_argument(argv[optind - 1]);
3709             break;
3710         case '?':
3711             unrecognized_option(argv[optind - 1]);
3712             break;
3713         case 'h':
3714             help();
3715             break;
3716         case 'f':
3717             fmt = optarg;
3718             break;
3719         case 'q':
3720             quiet = true;
3721             break;
3722         case OPTION_OBJECT: {
3723             QemuOpts *opts;
3724             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3725                                            optarg, true);
3726             if (!opts) {
3727                 return 1;
3728             }
3729         }   break;
3730         case OPTION_IMAGE_OPTS:
3731             image_opts = true;
3732             break;
3733         case OPTION_PREALLOCATION:
3734             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
3735                                        PREALLOC_MODE__MAX, NULL);
3736             if (prealloc == PREALLOC_MODE__MAX) {
3737                 error_report("Invalid preallocation mode '%s'", optarg);
3738                 return 1;
3739             }
3740             break;
3741         case OPTION_SHRINK:
3742             shrink = true;
3743             break;
3744         }
3745     }
3746     if (optind != argc - 1) {
3747         error_exit("Expecting image file name and size");
3748     }
3749     filename = argv[optind++];
3750 
3751     if (qemu_opts_foreach(&qemu_object_opts,
3752                           user_creatable_add_opts_foreach,
3753                           qemu_img_object_print_help, &error_fatal)) {
3754         return 1;
3755     }
3756 
3757     /* Choose grow, shrink, or absolute resize mode */
3758     switch (size[0]) {
3759     case '+':
3760         relative = 1;
3761         size++;
3762         break;
3763     case '-':
3764         relative = -1;
3765         size++;
3766         break;
3767     default:
3768         relative = 0;
3769         break;
3770     }
3771 
3772     /* Parse size */
3773     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3774     qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3775     if (err) {
3776         error_report_err(err);
3777         ret = -1;
3778         qemu_opts_del(param);
3779         goto out;
3780     }
3781     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3782     qemu_opts_del(param);
3783 
3784     blk = img_open(image_opts, filename, fmt,
3785                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
3786                    false);
3787     if (!blk) {
3788         ret = -1;
3789         goto out;
3790     }
3791 
3792     current_size = blk_getlength(blk);
3793     if (current_size < 0) {
3794         error_report("Failed to inquire current image length: %s",
3795                      strerror(-current_size));
3796         ret = -1;
3797         goto out;
3798     }
3799 
3800     if (relative) {
3801         total_size = current_size + n * relative;
3802     } else {
3803         total_size = n;
3804     }
3805     if (total_size <= 0) {
3806         error_report("New image size must be positive");
3807         ret = -1;
3808         goto out;
3809     }
3810 
3811     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
3812         error_report("Preallocation can only be used for growing images");
3813         ret = -1;
3814         goto out;
3815     }
3816 
3817     if (total_size < current_size && !shrink) {
3818         warn_report("Shrinking an image will delete all data beyond the "
3819                     "shrunken image's end. Before performing such an "
3820                     "operation, make sure there is no important data there.");
3821 
3822         if (g_strcmp0(bdrv_get_format_name(blk_bs(blk)), "raw") != 0) {
3823             error_report(
3824               "Use the --shrink option to perform a shrink operation.");
3825             ret = -1;
3826             goto out;
3827         } else {
3828             warn_report("Using the --shrink option will suppress this message. "
3829                         "Note that future versions of qemu-img may refuse to "
3830                         "shrink images without this option.");
3831         }
3832     }
3833 
3834     ret = blk_truncate(blk, total_size, prealloc, &err);
3835     if (ret < 0) {
3836         error_report_err(err);
3837         goto out;
3838     }
3839 
3840     new_size = blk_getlength(blk);
3841     if (new_size < 0) {
3842         error_report("Failed to verify truncated image length: %s",
3843                      strerror(-new_size));
3844         ret = -1;
3845         goto out;
3846     }
3847 
3848     /* Some block drivers implement a truncation method, but only so
3849      * the user can cause qemu to refresh the image's size from disk.
3850      * The idea is that the user resizes the image outside of qemu and
3851      * then invokes block_resize to inform qemu about it.
3852      * (This includes iscsi and file-posix for device files.)
3853      * Of course, that is not the behavior someone invoking
3854      * qemu-img resize would find useful, so we catch that behavior
3855      * here and tell the user. */
3856     if (new_size != total_size && new_size == current_size) {
3857         error_report("Image was not resized; resizing may not be supported "
3858                      "for this image");
3859         ret = -1;
3860         goto out;
3861     }
3862 
3863     if (new_size != total_size) {
3864         warn_report("Image should have been resized to %" PRIi64
3865                     " bytes, but was resized to %" PRIi64 " bytes",
3866                     total_size, new_size);
3867     }
3868 
3869     qprintf(quiet, "Image resized.\n");
3870 
3871 out:
3872     blk_unref(blk);
3873     if (ret) {
3874         return 1;
3875     }
3876     return 0;
3877 }
3878 
3879 static void amend_status_cb(BlockDriverState *bs,
3880                             int64_t offset, int64_t total_work_size,
3881                             void *opaque)
3882 {
3883     qemu_progress_print(100.f * offset / total_work_size, 0);
3884 }
3885 
3886 static int print_amend_option_help(const char *format)
3887 {
3888     BlockDriver *drv;
3889 
3890     /* Find driver and parse its options */
3891     drv = bdrv_find_format(format);
3892     if (!drv) {
3893         error_report("Unknown file format '%s'", format);
3894         return 1;
3895     }
3896 
3897     if (!drv->bdrv_amend_options) {
3898         error_report("Format driver '%s' does not support option amendment",
3899                      format);
3900         return 1;
3901     }
3902 
3903     /* Every driver supporting amendment must have create_opts */
3904     assert(drv->create_opts);
3905 
3906     printf("Creation options for '%s':\n", format);
3907     qemu_opts_print_help(drv->create_opts, false);
3908     printf("\nNote that not all of these options may be amendable.\n");
3909     return 0;
3910 }
3911 
3912 static int img_amend(int argc, char **argv)
3913 {
3914     Error *err = NULL;
3915     int c, ret = 0;
3916     char *options = NULL;
3917     QemuOptsList *create_opts = NULL;
3918     QemuOpts *opts = NULL;
3919     const char *fmt = NULL, *filename, *cache;
3920     int flags;
3921     bool writethrough;
3922     bool quiet = false, progress = false;
3923     BlockBackend *blk = NULL;
3924     BlockDriverState *bs = NULL;
3925     bool image_opts = false;
3926 
3927     cache = BDRV_DEFAULT_CACHE;
3928     for (;;) {
3929         static const struct option long_options[] = {
3930             {"help", no_argument, 0, 'h'},
3931             {"object", required_argument, 0, OPTION_OBJECT},
3932             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3933             {0, 0, 0, 0}
3934         };
3935         c = getopt_long(argc, argv, ":ho:f:t:pq",
3936                         long_options, NULL);
3937         if (c == -1) {
3938             break;
3939         }
3940 
3941         switch (c) {
3942         case ':':
3943             missing_argument(argv[optind - 1]);
3944             break;
3945         case '?':
3946             unrecognized_option(argv[optind - 1]);
3947             break;
3948         case 'h':
3949             help();
3950             break;
3951         case 'o':
3952             if (!is_valid_option_list(optarg)) {
3953                 error_report("Invalid option list: %s", optarg);
3954                 ret = -1;
3955                 goto out_no_progress;
3956             }
3957             if (!options) {
3958                 options = g_strdup(optarg);
3959             } else {
3960                 char *old_options = options;
3961                 options = g_strdup_printf("%s,%s", options, optarg);
3962                 g_free(old_options);
3963             }
3964             break;
3965         case 'f':
3966             fmt = optarg;
3967             break;
3968         case 't':
3969             cache = optarg;
3970             break;
3971         case 'p':
3972             progress = true;
3973             break;
3974         case 'q':
3975             quiet = true;
3976             break;
3977         case OPTION_OBJECT:
3978             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3979                                            optarg, true);
3980             if (!opts) {
3981                 ret = -1;
3982                 goto out_no_progress;
3983             }
3984             break;
3985         case OPTION_IMAGE_OPTS:
3986             image_opts = true;
3987             break;
3988         }
3989     }
3990 
3991     if (!options) {
3992         error_exit("Must specify options (-o)");
3993     }
3994 
3995     if (qemu_opts_foreach(&qemu_object_opts,
3996                           user_creatable_add_opts_foreach,
3997                           qemu_img_object_print_help, &error_fatal)) {
3998         ret = -1;
3999         goto out_no_progress;
4000     }
4001 
4002     if (quiet) {
4003         progress = false;
4004     }
4005     qemu_progress_init(progress, 1.0);
4006 
4007     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4008     if (fmt && has_help_option(options)) {
4009         /* If a format is explicitly specified (and possibly no filename is
4010          * given), print option help here */
4011         ret = print_amend_option_help(fmt);
4012         goto out;
4013     }
4014 
4015     if (optind != argc - 1) {
4016         error_report("Expecting one image file name");
4017         ret = -1;
4018         goto out;
4019     }
4020 
4021     flags = BDRV_O_RDWR;
4022     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4023     if (ret < 0) {
4024         error_report("Invalid cache option: %s", cache);
4025         goto out;
4026     }
4027 
4028     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4029                    false);
4030     if (!blk) {
4031         ret = -1;
4032         goto out;
4033     }
4034     bs = blk_bs(blk);
4035 
4036     fmt = bs->drv->format_name;
4037 
4038     if (has_help_option(options)) {
4039         /* If the format was auto-detected, print option help here */
4040         ret = print_amend_option_help(fmt);
4041         goto out;
4042     }
4043 
4044     if (!bs->drv->bdrv_amend_options) {
4045         error_report("Format driver '%s' does not support option amendment",
4046                      fmt);
4047         ret = -1;
4048         goto out;
4049     }
4050 
4051     /* Every driver supporting amendment must have create_opts */
4052     assert(bs->drv->create_opts);
4053 
4054     create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
4055     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4056     qemu_opts_do_parse(opts, options, NULL, &err);
4057     if (err) {
4058         error_report_err(err);
4059         ret = -1;
4060         goto out;
4061     }
4062 
4063     /* In case the driver does not call amend_status_cb() */
4064     qemu_progress_print(0.f, 0);
4065     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, &err);
4066     qemu_progress_print(100.f, 0);
4067     if (ret < 0) {
4068         error_report_err(err);
4069         goto out;
4070     }
4071 
4072 out:
4073     qemu_progress_end();
4074 
4075 out_no_progress:
4076     blk_unref(blk);
4077     qemu_opts_del(opts);
4078     qemu_opts_free(create_opts);
4079     g_free(options);
4080 
4081     if (ret) {
4082         return 1;
4083     }
4084     return 0;
4085 }
4086 
4087 typedef struct BenchData {
4088     BlockBackend *blk;
4089     uint64_t image_size;
4090     bool write;
4091     int bufsize;
4092     int step;
4093     int nrreq;
4094     int n;
4095     int flush_interval;
4096     bool drain_on_flush;
4097     uint8_t *buf;
4098     QEMUIOVector *qiov;
4099 
4100     int in_flight;
4101     bool in_flush;
4102     uint64_t offset;
4103 } BenchData;
4104 
4105 static void bench_undrained_flush_cb(void *opaque, int ret)
4106 {
4107     if (ret < 0) {
4108         error_report("Failed flush request: %s", strerror(-ret));
4109         exit(EXIT_FAILURE);
4110     }
4111 }
4112 
4113 static void bench_cb(void *opaque, int ret)
4114 {
4115     BenchData *b = opaque;
4116     BlockAIOCB *acb;
4117 
4118     if (ret < 0) {
4119         error_report("Failed request: %s", strerror(-ret));
4120         exit(EXIT_FAILURE);
4121     }
4122 
4123     if (b->in_flush) {
4124         /* Just finished a flush with drained queue: Start next requests */
4125         assert(b->in_flight == 0);
4126         b->in_flush = false;
4127     } else if (b->in_flight > 0) {
4128         int remaining = b->n - b->in_flight;
4129 
4130         b->n--;
4131         b->in_flight--;
4132 
4133         /* Time for flush? Drain queue if requested, then flush */
4134         if (b->flush_interval && remaining % b->flush_interval == 0) {
4135             if (!b->in_flight || !b->drain_on_flush) {
4136                 BlockCompletionFunc *cb;
4137 
4138                 if (b->drain_on_flush) {
4139                     b->in_flush = true;
4140                     cb = bench_cb;
4141                 } else {
4142                     cb = bench_undrained_flush_cb;
4143                 }
4144 
4145                 acb = blk_aio_flush(b->blk, cb, b);
4146                 if (!acb) {
4147                     error_report("Failed to issue flush request");
4148                     exit(EXIT_FAILURE);
4149                 }
4150             }
4151             if (b->drain_on_flush) {
4152                 return;
4153             }
4154         }
4155     }
4156 
4157     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4158         int64_t offset = b->offset;
4159         /* blk_aio_* might look for completed I/Os and kick bench_cb
4160          * again, so make sure this operation is counted by in_flight
4161          * and b->offset is ready for the next submission.
4162          */
4163         b->in_flight++;
4164         b->offset += b->step;
4165         b->offset %= b->image_size;
4166         if (b->write) {
4167             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4168         } else {
4169             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4170         }
4171         if (!acb) {
4172             error_report("Failed to issue request");
4173             exit(EXIT_FAILURE);
4174         }
4175     }
4176 }
4177 
4178 static int img_bench(int argc, char **argv)
4179 {
4180     int c, ret = 0;
4181     const char *fmt = NULL, *filename;
4182     bool quiet = false;
4183     bool image_opts = false;
4184     bool is_write = false;
4185     int count = 75000;
4186     int depth = 64;
4187     int64_t offset = 0;
4188     size_t bufsize = 4096;
4189     int pattern = 0;
4190     size_t step = 0;
4191     int flush_interval = 0;
4192     bool drain_on_flush = true;
4193     int64_t image_size;
4194     BlockBackend *blk = NULL;
4195     BenchData data = {};
4196     int flags = 0;
4197     bool writethrough = false;
4198     struct timeval t1, t2;
4199     int i;
4200     bool force_share = false;
4201     size_t buf_size;
4202 
4203     for (;;) {
4204         static const struct option long_options[] = {
4205             {"help", no_argument, 0, 'h'},
4206             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4207             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4208             {"pattern", required_argument, 0, OPTION_PATTERN},
4209             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4210             {"force-share", no_argument, 0, 'U'},
4211             {0, 0, 0, 0}
4212         };
4213         c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:wU", long_options, NULL);
4214         if (c == -1) {
4215             break;
4216         }
4217 
4218         switch (c) {
4219         case ':':
4220             missing_argument(argv[optind - 1]);
4221             break;
4222         case '?':
4223             unrecognized_option(argv[optind - 1]);
4224             break;
4225         case 'h':
4226             help();
4227             break;
4228         case 'c':
4229         {
4230             unsigned long res;
4231 
4232             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4233                 error_report("Invalid request count specified");
4234                 return 1;
4235             }
4236             count = res;
4237             break;
4238         }
4239         case 'd':
4240         {
4241             unsigned long res;
4242 
4243             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4244                 error_report("Invalid queue depth specified");
4245                 return 1;
4246             }
4247             depth = res;
4248             break;
4249         }
4250         case 'f':
4251             fmt = optarg;
4252             break;
4253         case 'n':
4254             flags |= BDRV_O_NATIVE_AIO;
4255             break;
4256         case 'o':
4257         {
4258             offset = cvtnum(optarg);
4259             if (offset < 0) {
4260                 error_report("Invalid offset specified");
4261                 return 1;
4262             }
4263             break;
4264         }
4265             break;
4266         case 'q':
4267             quiet = true;
4268             break;
4269         case 's':
4270         {
4271             int64_t sval;
4272 
4273             sval = cvtnum(optarg);
4274             if (sval < 0 || sval > INT_MAX) {
4275                 error_report("Invalid buffer size specified");
4276                 return 1;
4277             }
4278 
4279             bufsize = sval;
4280             break;
4281         }
4282         case 'S':
4283         {
4284             int64_t sval;
4285 
4286             sval = cvtnum(optarg);
4287             if (sval < 0 || sval > INT_MAX) {
4288                 error_report("Invalid step size specified");
4289                 return 1;
4290             }
4291 
4292             step = sval;
4293             break;
4294         }
4295         case 't':
4296             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4297             if (ret < 0) {
4298                 error_report("Invalid cache mode");
4299                 ret = -1;
4300                 goto out;
4301             }
4302             break;
4303         case 'w':
4304             flags |= BDRV_O_RDWR;
4305             is_write = true;
4306             break;
4307         case 'U':
4308             force_share = true;
4309             break;
4310         case OPTION_PATTERN:
4311         {
4312             unsigned long res;
4313 
4314             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4315                 error_report("Invalid pattern byte specified");
4316                 return 1;
4317             }
4318             pattern = res;
4319             break;
4320         }
4321         case OPTION_FLUSH_INTERVAL:
4322         {
4323             unsigned long res;
4324 
4325             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4326                 error_report("Invalid flush interval specified");
4327                 return 1;
4328             }
4329             flush_interval = res;
4330             break;
4331         }
4332         case OPTION_NO_DRAIN:
4333             drain_on_flush = false;
4334             break;
4335         case OPTION_IMAGE_OPTS:
4336             image_opts = true;
4337             break;
4338         }
4339     }
4340 
4341     if (optind != argc - 1) {
4342         error_exit("Expecting one image file name");
4343     }
4344     filename = argv[argc - 1];
4345 
4346     if (!is_write && flush_interval) {
4347         error_report("--flush-interval is only available in write tests");
4348         ret = -1;
4349         goto out;
4350     }
4351     if (flush_interval && flush_interval < depth) {
4352         error_report("Flush interval can't be smaller than depth");
4353         ret = -1;
4354         goto out;
4355     }
4356 
4357     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4358                    force_share);
4359     if (!blk) {
4360         ret = -1;
4361         goto out;
4362     }
4363 
4364     image_size = blk_getlength(blk);
4365     if (image_size < 0) {
4366         ret = image_size;
4367         goto out;
4368     }
4369 
4370     data = (BenchData) {
4371         .blk            = blk,
4372         .image_size     = image_size,
4373         .bufsize        = bufsize,
4374         .step           = step ?: bufsize,
4375         .nrreq          = depth,
4376         .n              = count,
4377         .offset         = offset,
4378         .write          = is_write,
4379         .flush_interval = flush_interval,
4380         .drain_on_flush = drain_on_flush,
4381     };
4382     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4383            "(starting at offset %" PRId64 ", step size %d)\n",
4384            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4385            data.offset, data.step);
4386     if (flush_interval) {
4387         printf("Sending flush every %d requests\n", flush_interval);
4388     }
4389 
4390     buf_size = data.nrreq * data.bufsize;
4391     data.buf = blk_blockalign(blk, buf_size);
4392     memset(data.buf, pattern, data.nrreq * data.bufsize);
4393 
4394     blk_register_buf(blk, data.buf, buf_size);
4395 
4396     data.qiov = g_new(QEMUIOVector, data.nrreq);
4397     for (i = 0; i < data.nrreq; i++) {
4398         qemu_iovec_init(&data.qiov[i], 1);
4399         qemu_iovec_add(&data.qiov[i],
4400                        data.buf + i * data.bufsize, data.bufsize);
4401     }
4402 
4403     gettimeofday(&t1, NULL);
4404     bench_cb(&data, 0);
4405 
4406     while (data.n > 0) {
4407         main_loop_wait(false);
4408     }
4409     gettimeofday(&t2, NULL);
4410 
4411     printf("Run completed in %3.3f seconds.\n",
4412            (t2.tv_sec - t1.tv_sec)
4413            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4414 
4415 out:
4416     if (data.buf) {
4417         blk_unregister_buf(blk, data.buf);
4418     }
4419     qemu_vfree(data.buf);
4420     blk_unref(blk);
4421 
4422     if (ret) {
4423         return 1;
4424     }
4425     return 0;
4426 }
4427 
4428 #define C_BS      01
4429 #define C_COUNT   02
4430 #define C_IF      04
4431 #define C_OF      010
4432 #define C_SKIP    020
4433 
4434 struct DdInfo {
4435     unsigned int flags;
4436     int64_t count;
4437 };
4438 
4439 struct DdIo {
4440     int bsz;    /* Block size */
4441     char *filename;
4442     uint8_t *buf;
4443     int64_t offset;
4444 };
4445 
4446 struct DdOpts {
4447     const char *name;
4448     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
4449     unsigned int flag;
4450 };
4451 
4452 static int img_dd_bs(const char *arg,
4453                      struct DdIo *in, struct DdIo *out,
4454                      struct DdInfo *dd)
4455 {
4456     int64_t res;
4457 
4458     res = cvtnum(arg);
4459 
4460     if (res <= 0 || res > INT_MAX) {
4461         error_report("invalid number: '%s'", arg);
4462         return 1;
4463     }
4464     in->bsz = out->bsz = res;
4465 
4466     return 0;
4467 }
4468 
4469 static int img_dd_count(const char *arg,
4470                         struct DdIo *in, struct DdIo *out,
4471                         struct DdInfo *dd)
4472 {
4473     dd->count = cvtnum(arg);
4474 
4475     if (dd->count < 0) {
4476         error_report("invalid number: '%s'", arg);
4477         return 1;
4478     }
4479 
4480     return 0;
4481 }
4482 
4483 static int img_dd_if(const char *arg,
4484                      struct DdIo *in, struct DdIo *out,
4485                      struct DdInfo *dd)
4486 {
4487     in->filename = g_strdup(arg);
4488 
4489     return 0;
4490 }
4491 
4492 static int img_dd_of(const char *arg,
4493                      struct DdIo *in, struct DdIo *out,
4494                      struct DdInfo *dd)
4495 {
4496     out->filename = g_strdup(arg);
4497 
4498     return 0;
4499 }
4500 
4501 static int img_dd_skip(const char *arg,
4502                        struct DdIo *in, struct DdIo *out,
4503                        struct DdInfo *dd)
4504 {
4505     in->offset = cvtnum(arg);
4506 
4507     if (in->offset < 0) {
4508         error_report("invalid number: '%s'", arg);
4509         return 1;
4510     }
4511 
4512     return 0;
4513 }
4514 
4515 static int img_dd(int argc, char **argv)
4516 {
4517     int ret = 0;
4518     char *arg = NULL;
4519     char *tmp;
4520     BlockDriver *drv = NULL, *proto_drv = NULL;
4521     BlockBackend *blk1 = NULL, *blk2 = NULL;
4522     QemuOpts *opts = NULL;
4523     QemuOptsList *create_opts = NULL;
4524     Error *local_err = NULL;
4525     bool image_opts = false;
4526     int c, i;
4527     const char *out_fmt = "raw";
4528     const char *fmt = NULL;
4529     int64_t size = 0;
4530     int64_t block_count = 0, out_pos, in_pos;
4531     bool force_share = false;
4532     struct DdInfo dd = {
4533         .flags = 0,
4534         .count = 0,
4535     };
4536     struct DdIo in = {
4537         .bsz = 512, /* Block size is by default 512 bytes */
4538         .filename = NULL,
4539         .buf = NULL,
4540         .offset = 0
4541     };
4542     struct DdIo out = {
4543         .bsz = 512,
4544         .filename = NULL,
4545         .buf = NULL,
4546         .offset = 0
4547     };
4548 
4549     const struct DdOpts options[] = {
4550         { "bs", img_dd_bs, C_BS },
4551         { "count", img_dd_count, C_COUNT },
4552         { "if", img_dd_if, C_IF },
4553         { "of", img_dd_of, C_OF },
4554         { "skip", img_dd_skip, C_SKIP },
4555         { NULL, NULL, 0 }
4556     };
4557     const struct option long_options[] = {
4558         { "help", no_argument, 0, 'h'},
4559         { "object", required_argument, 0, OPTION_OBJECT},
4560         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4561         { "force-share", no_argument, 0, 'U'},
4562         { 0, 0, 0, 0 }
4563     };
4564 
4565     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
4566         if (c == EOF) {
4567             break;
4568         }
4569         switch (c) {
4570         case 'O':
4571             out_fmt = optarg;
4572             break;
4573         case 'f':
4574             fmt = optarg;
4575             break;
4576         case ':':
4577             missing_argument(argv[optind - 1]);
4578             break;
4579         case '?':
4580             unrecognized_option(argv[optind - 1]);
4581             break;
4582         case 'h':
4583             help();
4584             break;
4585         case 'U':
4586             force_share = true;
4587             break;
4588         case OPTION_OBJECT:
4589             if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) {
4590                 ret = -1;
4591                 goto out;
4592             }
4593             break;
4594         case OPTION_IMAGE_OPTS:
4595             image_opts = true;
4596             break;
4597         }
4598     }
4599 
4600     for (i = optind; i < argc; i++) {
4601         int j;
4602         arg = g_strdup(argv[i]);
4603 
4604         tmp = strchr(arg, '=');
4605         if (tmp == NULL) {
4606             error_report("unrecognized operand %s", arg);
4607             ret = -1;
4608             goto out;
4609         }
4610 
4611         *tmp++ = '\0';
4612 
4613         for (j = 0; options[j].name != NULL; j++) {
4614             if (!strcmp(arg, options[j].name)) {
4615                 break;
4616             }
4617         }
4618         if (options[j].name == NULL) {
4619             error_report("unrecognized operand %s", arg);
4620             ret = -1;
4621             goto out;
4622         }
4623 
4624         if (options[j].f(tmp, &in, &out, &dd) != 0) {
4625             ret = -1;
4626             goto out;
4627         }
4628         dd.flags |= options[j].flag;
4629         g_free(arg);
4630         arg = NULL;
4631     }
4632 
4633     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4634         error_report("Must specify both input and output files");
4635         ret = -1;
4636         goto out;
4637     }
4638 
4639     if (qemu_opts_foreach(&qemu_object_opts,
4640                           user_creatable_add_opts_foreach,
4641                           qemu_img_object_print_help, &error_fatal)) {
4642         ret = -1;
4643         goto out;
4644     }
4645 
4646     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
4647                     force_share);
4648 
4649     if (!blk1) {
4650         ret = -1;
4651         goto out;
4652     }
4653 
4654     drv = bdrv_find_format(out_fmt);
4655     if (!drv) {
4656         error_report("Unknown file format");
4657         ret = -1;
4658         goto out;
4659     }
4660     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4661 
4662     if (!proto_drv) {
4663         error_report_err(local_err);
4664         ret = -1;
4665         goto out;
4666     }
4667     if (!drv->create_opts) {
4668         error_report("Format driver '%s' does not support image creation",
4669                      drv->format_name);
4670         ret = -1;
4671         goto out;
4672     }
4673     if (!proto_drv->create_opts) {
4674         error_report("Protocol driver '%s' does not support image creation",
4675                      proto_drv->format_name);
4676         ret = -1;
4677         goto out;
4678     }
4679     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4680     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4681 
4682     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4683 
4684     size = blk_getlength(blk1);
4685     if (size < 0) {
4686         error_report("Failed to get size for '%s'", in.filename);
4687         ret = -1;
4688         goto out;
4689     }
4690 
4691     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4692         dd.count * in.bsz < size) {
4693         size = dd.count * in.bsz;
4694     }
4695 
4696     /* Overflow means the specified offset is beyond input image's size */
4697     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4698                               size < in.bsz * in.offset)) {
4699         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4700     } else {
4701         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4702                             size - in.bsz * in.offset, &error_abort);
4703     }
4704 
4705     ret = bdrv_create(drv, out.filename, opts, &local_err);
4706     if (ret < 0) {
4707         error_reportf_err(local_err,
4708                           "%s: error while creating output image: ",
4709                           out.filename);
4710         ret = -1;
4711         goto out;
4712     }
4713 
4714     /* TODO, we can't honour --image-opts for the target,
4715      * since it needs to be given in a format compatible
4716      * with the bdrv_create() call above which does not
4717      * support image-opts style.
4718      */
4719     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
4720                          false, false, false);
4721 
4722     if (!blk2) {
4723         ret = -1;
4724         goto out;
4725     }
4726 
4727     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4728                               size < in.offset * in.bsz)) {
4729         /* We give a warning if the skip option is bigger than the input
4730          * size and create an empty output disk image (i.e. like dd(1)).
4731          */
4732         error_report("%s: cannot skip to specified offset", in.filename);
4733         in_pos = size;
4734     } else {
4735         in_pos = in.offset * in.bsz;
4736     }
4737 
4738     in.buf = g_new(uint8_t, in.bsz);
4739 
4740     for (out_pos = 0; in_pos < size; block_count++) {
4741         int in_ret, out_ret;
4742 
4743         if (in_pos + in.bsz > size) {
4744             in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4745         } else {
4746             in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4747         }
4748         if (in_ret < 0) {
4749             error_report("error while reading from input image file: %s",
4750                          strerror(-in_ret));
4751             ret = -1;
4752             goto out;
4753         }
4754         in_pos += in_ret;
4755 
4756         out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4757 
4758         if (out_ret < 0) {
4759             error_report("error while writing to output image file: %s",
4760                          strerror(-out_ret));
4761             ret = -1;
4762             goto out;
4763         }
4764         out_pos += out_ret;
4765     }
4766 
4767 out:
4768     g_free(arg);
4769     qemu_opts_del(opts);
4770     qemu_opts_free(create_opts);
4771     blk_unref(blk1);
4772     blk_unref(blk2);
4773     g_free(in.filename);
4774     g_free(out.filename);
4775     g_free(in.buf);
4776     g_free(out.buf);
4777 
4778     if (ret) {
4779         return 1;
4780     }
4781     return 0;
4782 }
4783 
4784 static void dump_json_block_measure_info(BlockMeasureInfo *info)
4785 {
4786     QString *str;
4787     QObject *obj;
4788     Visitor *v = qobject_output_visitor_new(&obj);
4789 
4790     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
4791     visit_complete(v, &obj);
4792     str = qobject_to_json_pretty(obj);
4793     assert(str != NULL);
4794     printf("%s\n", qstring_get_str(str));
4795     qobject_unref(obj);
4796     visit_free(v);
4797     qobject_unref(str);
4798 }
4799 
4800 static int img_measure(int argc, char **argv)
4801 {
4802     static const struct option long_options[] = {
4803         {"help", no_argument, 0, 'h'},
4804         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4805         {"object", required_argument, 0, OPTION_OBJECT},
4806         {"output", required_argument, 0, OPTION_OUTPUT},
4807         {"size", required_argument, 0, OPTION_SIZE},
4808         {"force-share", no_argument, 0, 'U'},
4809         {0, 0, 0, 0}
4810     };
4811     OutputFormat output_format = OFORMAT_HUMAN;
4812     BlockBackend *in_blk = NULL;
4813     BlockDriver *drv;
4814     const char *filename = NULL;
4815     const char *fmt = NULL;
4816     const char *out_fmt = "raw";
4817     char *options = NULL;
4818     char *snapshot_name = NULL;
4819     bool force_share = false;
4820     QemuOpts *opts = NULL;
4821     QemuOpts *object_opts = NULL;
4822     QemuOpts *sn_opts = NULL;
4823     QemuOptsList *create_opts = NULL;
4824     bool image_opts = false;
4825     uint64_t img_size = UINT64_MAX;
4826     BlockMeasureInfo *info = NULL;
4827     Error *local_err = NULL;
4828     int ret = 1;
4829     int c;
4830 
4831     while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
4832                             long_options, NULL)) != -1) {
4833         switch (c) {
4834         case '?':
4835         case 'h':
4836             help();
4837             break;
4838         case 'f':
4839             fmt = optarg;
4840             break;
4841         case 'O':
4842             out_fmt = optarg;
4843             break;
4844         case 'o':
4845             if (!is_valid_option_list(optarg)) {
4846                 error_report("Invalid option list: %s", optarg);
4847                 goto out;
4848             }
4849             if (!options) {
4850                 options = g_strdup(optarg);
4851             } else {
4852                 char *old_options = options;
4853                 options = g_strdup_printf("%s,%s", options, optarg);
4854                 g_free(old_options);
4855             }
4856             break;
4857         case 'l':
4858             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
4859                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
4860                                                   optarg, false);
4861                 if (!sn_opts) {
4862                     error_report("Failed in parsing snapshot param '%s'",
4863                                  optarg);
4864                     goto out;
4865                 }
4866             } else {
4867                 snapshot_name = optarg;
4868             }
4869             break;
4870         case 'U':
4871             force_share = true;
4872             break;
4873         case OPTION_OBJECT:
4874             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
4875                                                   optarg, true);
4876             if (!object_opts) {
4877                 goto out;
4878             }
4879             break;
4880         case OPTION_IMAGE_OPTS:
4881             image_opts = true;
4882             break;
4883         case OPTION_OUTPUT:
4884             if (!strcmp(optarg, "json")) {
4885                 output_format = OFORMAT_JSON;
4886             } else if (!strcmp(optarg, "human")) {
4887                 output_format = OFORMAT_HUMAN;
4888             } else {
4889                 error_report("--output must be used with human or json "
4890                              "as argument.");
4891                 goto out;
4892             }
4893             break;
4894         case OPTION_SIZE:
4895         {
4896             int64_t sval;
4897 
4898             sval = cvtnum(optarg);
4899             if (sval < 0) {
4900                 if (sval == -ERANGE) {
4901                     error_report("Image size must be less than 8 EiB!");
4902                 } else {
4903                     error_report("Invalid image size specified! You may use "
4904                                  "k, M, G, T, P or E suffixes for ");
4905                     error_report("kilobytes, megabytes, gigabytes, terabytes, "
4906                                  "petabytes and exabytes.");
4907                 }
4908                 goto out;
4909             }
4910             img_size = (uint64_t)sval;
4911         }
4912         break;
4913         }
4914     }
4915 
4916     if (qemu_opts_foreach(&qemu_object_opts,
4917                           user_creatable_add_opts_foreach,
4918                           qemu_img_object_print_help, &error_fatal)) {
4919         goto out;
4920     }
4921 
4922     if (argc - optind > 1) {
4923         error_report("At most one filename argument is allowed.");
4924         goto out;
4925     } else if (argc - optind == 1) {
4926         filename = argv[optind];
4927     }
4928 
4929     if (!filename &&
4930         (object_opts || image_opts || fmt || snapshot_name || sn_opts)) {
4931         error_report("--object, --image-opts, -f, and -l "
4932                      "require a filename argument.");
4933         goto out;
4934     }
4935     if (filename && img_size != UINT64_MAX) {
4936         error_report("--size N cannot be used together with a filename.");
4937         goto out;
4938     }
4939     if (!filename && img_size == UINT64_MAX) {
4940         error_report("Either --size N or one filename must be specified.");
4941         goto out;
4942     }
4943 
4944     if (filename) {
4945         in_blk = img_open(image_opts, filename, fmt, 0,
4946                           false, false, force_share);
4947         if (!in_blk) {
4948             goto out;
4949         }
4950 
4951         if (sn_opts) {
4952             bdrv_snapshot_load_tmp(blk_bs(in_blk),
4953                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
4954                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
4955                     &local_err);
4956         } else if (snapshot_name != NULL) {
4957             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
4958                     snapshot_name, &local_err);
4959         }
4960         if (local_err) {
4961             error_reportf_err(local_err, "Failed to load snapshot: ");
4962             goto out;
4963         }
4964     }
4965 
4966     drv = bdrv_find_format(out_fmt);
4967     if (!drv) {
4968         error_report("Unknown file format '%s'", out_fmt);
4969         goto out;
4970     }
4971     if (!drv->create_opts) {
4972         error_report("Format driver '%s' does not support image creation",
4973                      drv->format_name);
4974         goto out;
4975     }
4976 
4977     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4978     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
4979     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4980     if (options) {
4981         qemu_opts_do_parse(opts, options, NULL, &local_err);
4982         if (local_err) {
4983             error_report_err(local_err);
4984             error_report("Invalid options for file format '%s'", out_fmt);
4985             goto out;
4986         }
4987     }
4988     if (img_size != UINT64_MAX) {
4989         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
4990     }
4991 
4992     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
4993     if (local_err) {
4994         error_report_err(local_err);
4995         goto out;
4996     }
4997 
4998     if (output_format == OFORMAT_HUMAN) {
4999         printf("required size: %" PRIu64 "\n", info->required);
5000         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5001     } else {
5002         dump_json_block_measure_info(info);
5003     }
5004 
5005     ret = 0;
5006 
5007 out:
5008     qapi_free_BlockMeasureInfo(info);
5009     qemu_opts_del(object_opts);
5010     qemu_opts_del(opts);
5011     qemu_opts_del(sn_opts);
5012     qemu_opts_free(create_opts);
5013     g_free(options);
5014     blk_unref(in_blk);
5015     return ret;
5016 }
5017 
5018 static const img_cmd_t img_cmds[] = {
5019 #define DEF(option, callback, arg_string)        \
5020     { option, callback },
5021 #include "qemu-img-cmds.h"
5022 #undef DEF
5023     { NULL, NULL, },
5024 };
5025 
5026 int main(int argc, char **argv)
5027 {
5028     const img_cmd_t *cmd;
5029     const char *cmdname;
5030     Error *local_error = NULL;
5031     char *trace_file = NULL;
5032     int c;
5033     static const struct option long_options[] = {
5034         {"help", no_argument, 0, 'h'},
5035         {"version", no_argument, 0, 'V'},
5036         {"trace", required_argument, NULL, 'T'},
5037         {0, 0, 0, 0}
5038     };
5039 
5040 #ifdef CONFIG_POSIX
5041     signal(SIGPIPE, SIG_IGN);
5042 #endif
5043 
5044     error_init(argv[0]);
5045     module_call_init(MODULE_INIT_TRACE);
5046     qemu_init_exec_dir(argv[0]);
5047 
5048     if (qemu_init_main_loop(&local_error)) {
5049         error_report_err(local_error);
5050         exit(EXIT_FAILURE);
5051     }
5052 
5053     qcrypto_init(&error_fatal);
5054 
5055     module_call_init(MODULE_INIT_QOM);
5056     bdrv_init();
5057     if (argc < 2) {
5058         error_exit("Not enough arguments");
5059     }
5060 
5061     qemu_add_opts(&qemu_object_opts);
5062     qemu_add_opts(&qemu_source_opts);
5063     qemu_add_opts(&qemu_trace_opts);
5064 
5065     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5066         switch (c) {
5067         case ':':
5068             missing_argument(argv[optind - 1]);
5069             return 0;
5070         case '?':
5071             unrecognized_option(argv[optind - 1]);
5072             return 0;
5073         case 'h':
5074             help();
5075             return 0;
5076         case 'V':
5077             printf(QEMU_IMG_VERSION);
5078             return 0;
5079         case 'T':
5080             g_free(trace_file);
5081             trace_file = trace_opt_parse(optarg);
5082             break;
5083         }
5084     }
5085 
5086     cmdname = argv[optind];
5087 
5088     /* reset getopt_long scanning */
5089     argc -= optind;
5090     if (argc < 1) {
5091         return 0;
5092     }
5093     argv += optind;
5094     qemu_reset_optind();
5095 
5096     if (!trace_init_backends()) {
5097         exit(1);
5098     }
5099     trace_init_file(trace_file);
5100     qemu_set_log(LOG_TRACE);
5101 
5102     /* find the command */
5103     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5104         if (!strcmp(cmdname, cmd->name)) {
5105             return cmd->handler(argc, argv);
5106         }
5107     }
5108 
5109     /* not found */
5110     error_exit("Command not found: %s", cmdname);
5111 }
5112