xref: /openbmc/qemu/qemu-img.c (revision 795c40b8)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "qemu-version.h"
26 #include "qapi/error.h"
27 #include "qapi-visit.h"
28 #include "qapi/qobject-output-visitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qapi/qmp/qjson.h"
31 #include "qapi/qmp/qbool.h"
32 #include "qemu/cutils.h"
33 #include "qemu/config-file.h"
34 #include "qemu/option.h"
35 #include "qemu/error-report.h"
36 #include "qemu/log.h"
37 #include "qom/object_interfaces.h"
38 #include "sysemu/sysemu.h"
39 #include "sysemu/block-backend.h"
40 #include "block/block_int.h"
41 #include "block/blockjob.h"
42 #include "block/qapi.h"
43 #include "crypto/init.h"
44 #include "trace/control.h"
45 #include <getopt.h>
46 
47 #define QEMU_IMG_VERSION "qemu-img version " QEMU_VERSION QEMU_PKGVERSION \
48                           "\n" QEMU_COPYRIGHT "\n"
49 
50 typedef struct img_cmd_t {
51     const char *name;
52     int (*handler)(int argc, char **argv);
53 } img_cmd_t;
54 
55 enum {
56     OPTION_OUTPUT = 256,
57     OPTION_BACKING_CHAIN = 257,
58     OPTION_OBJECT = 258,
59     OPTION_IMAGE_OPTS = 259,
60     OPTION_PATTERN = 260,
61     OPTION_FLUSH_INTERVAL = 261,
62     OPTION_NO_DRAIN = 262,
63 };
64 
65 typedef enum OutputFormat {
66     OFORMAT_JSON,
67     OFORMAT_HUMAN,
68 } OutputFormat;
69 
70 /* Default to cache=writeback as data integrity is not important for qemu-img */
71 #define BDRV_DEFAULT_CACHE "writeback"
72 
73 static void format_print(void *opaque, const char *name)
74 {
75     printf(" %s", name);
76 }
77 
78 static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
79 {
80     va_list ap;
81 
82     error_printf("qemu-img: ");
83 
84     va_start(ap, fmt);
85     error_vprintf(fmt, ap);
86     va_end(ap);
87 
88     error_printf("\nTry 'qemu-img --help' for more information\n");
89     exit(EXIT_FAILURE);
90 }
91 
92 static void QEMU_NORETURN missing_argument(const char *option)
93 {
94     error_exit("missing argument for option '%s'", option);
95 }
96 
97 static void QEMU_NORETURN unrecognized_option(const char *option)
98 {
99     error_exit("unrecognized option '%s'", option);
100 }
101 
102 /* Please keep in synch with qemu-img.texi */
103 static void QEMU_NORETURN help(void)
104 {
105     const char *help_msg =
106            QEMU_IMG_VERSION
107            "usage: qemu-img [standard options] command [command options]\n"
108            "QEMU disk image utility\n"
109            "\n"
110            "    '-h', '--help'       display this help and exit\n"
111            "    '-V', '--version'    output version information and exit\n"
112            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
113            "                         specify tracing options\n"
114            "\n"
115            "Command syntax:\n"
116 #define DEF(option, callback, arg_string)        \
117            "  " arg_string "\n"
118 #include "qemu-img-cmds.h"
119 #undef DEF
120 #undef GEN_DOCS
121            "\n"
122            "Command parameters:\n"
123            "  'filename' is a disk image filename\n"
124            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
125            "    manual page for a description of the object properties. The most common\n"
126            "    object type is a 'secret', which is used to supply passwords and/or\n"
127            "    encryption keys.\n"
128            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
129            "  'cache' is the cache mode used to write the output disk image, the valid\n"
130            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
131            "    'directsync' and 'unsafe' (default for convert)\n"
132            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
133            "    options are the same as for the 'cache' option\n"
134            "  'size' is the disk image size in bytes. Optional suffixes\n"
135            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
136            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
137            "    supported. 'b' is ignored.\n"
138            "  'output_filename' is the destination disk image filename\n"
139            "  'output_fmt' is the destination format\n"
140            "  'options' is a comma separated list of format specific options in a\n"
141            "    name=value format. Use -o ? for an overview of the options supported by the\n"
142            "    used format\n"
143            "  'snapshot_param' is param used for internal snapshot, format\n"
144            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
145            "    '[ID_OR_NAME]'\n"
146            "  'snapshot_id_or_name' is deprecated, use 'snapshot_param'\n"
147            "    instead\n"
148            "  '-c' indicates that target image must be compressed (qcow format only)\n"
149            "  '-u' enables unsafe rebasing. It is assumed that old and new backing file\n"
150            "       match exactly. The image doesn't need a working backing file before\n"
151            "       rebasing in this case (useful for renaming the backing file)\n"
152            "  '-h' with or without a command shows this help and lists the supported formats\n"
153            "  '-p' show progress of command (only certain commands)\n"
154            "  '-q' use Quiet mode - do not print any output (except errors)\n"
155            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
156            "       contain only zeros for qemu-img to create a sparse image during\n"
157            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
158            "       unallocated or zero sectors, and the destination image will always be\n"
159            "       fully allocated\n"
160            "  '--output' takes the format in which the output must be done (human or json)\n"
161            "  '-n' skips the target volume creation (useful if the volume is created\n"
162            "       prior to running qemu-img)\n"
163            "\n"
164            "Parameters to check subcommand:\n"
165            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
166            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
167            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
168            "       hiding corruption that has already occurred.\n"
169            "\n"
170            "Parameters to convert subcommand:\n"
171            "  '-m' specifies how many coroutines work in parallel during the convert\n"
172            "       process (defaults to 8)\n"
173            "  '-W' allow to write to the target out of order rather than sequential\n"
174            "\n"
175            "Parameters to snapshot subcommand:\n"
176            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
177            "  '-a' applies a snapshot (revert disk to saved state)\n"
178            "  '-c' creates a snapshot\n"
179            "  '-d' deletes a snapshot\n"
180            "  '-l' lists all snapshots in the given image\n"
181            "\n"
182            "Parameters to compare subcommand:\n"
183            "  '-f' first image format\n"
184            "  '-F' second image format\n"
185            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
186            "\n"
187            "Parameters to dd subcommand:\n"
188            "  'bs=BYTES' read and write up to BYTES bytes at a time "
189            "(default: 512)\n"
190            "  'count=N' copy only N input blocks\n"
191            "  'if=FILE' read from FILE\n"
192            "  'of=FILE' write to FILE\n"
193            "  'skip=N' skip N bs-sized blocks at the start of input\n";
194 
195     printf("%s\nSupported formats:", help_msg);
196     bdrv_iterate_format(format_print, NULL);
197     printf("\n");
198     exit(EXIT_SUCCESS);
199 }
200 
201 static QemuOptsList qemu_object_opts = {
202     .name = "object",
203     .implied_opt_name = "qom-type",
204     .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
205     .desc = {
206         { }
207     },
208 };
209 
210 static QemuOptsList qemu_source_opts = {
211     .name = "source",
212     .implied_opt_name = "file",
213     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
214     .desc = {
215         { }
216     },
217 };
218 
219 static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
220 {
221     int ret = 0;
222     if (!quiet) {
223         va_list args;
224         va_start(args, fmt);
225         ret = vprintf(fmt, args);
226         va_end(args);
227     }
228     return ret;
229 }
230 
231 
232 static int print_block_option_help(const char *filename, const char *fmt)
233 {
234     BlockDriver *drv, *proto_drv;
235     QemuOptsList *create_opts = NULL;
236     Error *local_err = NULL;
237 
238     /* Find driver and parse its options */
239     drv = bdrv_find_format(fmt);
240     if (!drv) {
241         error_report("Unknown file format '%s'", fmt);
242         return 1;
243     }
244 
245     create_opts = qemu_opts_append(create_opts, drv->create_opts);
246     if (filename) {
247         proto_drv = bdrv_find_protocol(filename, true, &local_err);
248         if (!proto_drv) {
249             error_report_err(local_err);
250             qemu_opts_free(create_opts);
251             return 1;
252         }
253         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
254     }
255 
256     qemu_opts_print_help(create_opts);
257     qemu_opts_free(create_opts);
258     return 0;
259 }
260 
261 
262 static int img_open_password(BlockBackend *blk, const char *filename,
263                              int flags, bool quiet)
264 {
265     BlockDriverState *bs;
266     char password[256];
267 
268     bs = blk_bs(blk);
269     if (bdrv_is_encrypted(bs) && bdrv_key_required(bs) &&
270         !(flags & BDRV_O_NO_IO)) {
271         qprintf(quiet, "Disk image '%s' is encrypted.\n", filename);
272         if (qemu_read_password(password, sizeof(password)) < 0) {
273             error_report("No password given");
274             return -1;
275         }
276         if (bdrv_set_key(bs, password) < 0) {
277             error_report("invalid password");
278             return -1;
279         }
280     }
281     return 0;
282 }
283 
284 
285 static BlockBackend *img_open_opts(const char *optstr,
286                                    QemuOpts *opts, int flags, bool writethrough,
287                                    bool quiet, bool force_share)
288 {
289     QDict *options;
290     Error *local_err = NULL;
291     BlockBackend *blk;
292     options = qemu_opts_to_qdict(opts, NULL);
293     if (force_share) {
294         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
295             && !qdict_get_bool(options, BDRV_OPT_FORCE_SHARE)) {
296             error_report("--force-share/-U conflicts with image options");
297             return NULL;
298         }
299         qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true));
300     }
301     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
302     if (!blk) {
303         error_reportf_err(local_err, "Could not open '%s': ", optstr);
304         return NULL;
305     }
306     blk_set_enable_write_cache(blk, !writethrough);
307 
308     if (img_open_password(blk, optstr, flags, quiet) < 0) {
309         blk_unref(blk);
310         return NULL;
311     }
312     return blk;
313 }
314 
315 static BlockBackend *img_open_file(const char *filename,
316                                    const char *fmt, int flags,
317                                    bool writethrough, bool quiet,
318                                    bool force_share)
319 {
320     BlockBackend *blk;
321     Error *local_err = NULL;
322     QDict *options = qdict_new();
323 
324     if (fmt) {
325         qdict_put_str(options, "driver", fmt);
326     }
327 
328     if (force_share) {
329         qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true));
330     }
331     blk = blk_new_open(filename, NULL, options, flags, &local_err);
332     if (!blk) {
333         error_reportf_err(local_err, "Could not open '%s': ", filename);
334         return NULL;
335     }
336     blk_set_enable_write_cache(blk, !writethrough);
337 
338     if (img_open_password(blk, filename, flags, quiet) < 0) {
339         blk_unref(blk);
340         return NULL;
341     }
342     return blk;
343 }
344 
345 
346 static BlockBackend *img_open(bool image_opts,
347                               const char *filename,
348                               const char *fmt, int flags, bool writethrough,
349                               bool quiet, bool force_share)
350 {
351     BlockBackend *blk;
352     if (image_opts) {
353         QemuOpts *opts;
354         if (fmt) {
355             error_report("--image-opts and --format are mutually exclusive");
356             return NULL;
357         }
358         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
359                                        filename, true);
360         if (!opts) {
361             return NULL;
362         }
363         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
364                             force_share);
365     } else {
366         blk = img_open_file(filename, fmt, flags, writethrough, quiet,
367                             force_share);
368     }
369     return blk;
370 }
371 
372 
373 static int add_old_style_options(const char *fmt, QemuOpts *opts,
374                                  const char *base_filename,
375                                  const char *base_fmt)
376 {
377     Error *err = NULL;
378 
379     if (base_filename) {
380         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
381         if (err) {
382             error_report("Backing file not supported for file format '%s'",
383                          fmt);
384             error_free(err);
385             return -1;
386         }
387     }
388     if (base_fmt) {
389         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
390         if (err) {
391             error_report("Backing file format not supported for file "
392                          "format '%s'", fmt);
393             error_free(err);
394             return -1;
395         }
396     }
397     return 0;
398 }
399 
400 static int64_t cvtnum(const char *s)
401 {
402     int err;
403     uint64_t value;
404 
405     err = qemu_strtosz(s, NULL, &value);
406     if (err < 0) {
407         return err;
408     }
409     if (value > INT64_MAX) {
410         return -ERANGE;
411     }
412     return value;
413 }
414 
415 static int img_create(int argc, char **argv)
416 {
417     int c;
418     uint64_t img_size = -1;
419     const char *fmt = "raw";
420     const char *base_fmt = NULL;
421     const char *filename;
422     const char *base_filename = NULL;
423     char *options = NULL;
424     Error *local_err = NULL;
425     bool quiet = false;
426 
427     for(;;) {
428         static const struct option long_options[] = {
429             {"help", no_argument, 0, 'h'},
430             {"object", required_argument, 0, OPTION_OBJECT},
431             {0, 0, 0, 0}
432         };
433         c = getopt_long(argc, argv, ":F:b:f:he6o:q",
434                         long_options, NULL);
435         if (c == -1) {
436             break;
437         }
438         switch(c) {
439         case ':':
440             missing_argument(argv[optind - 1]);
441             break;
442         case '?':
443             unrecognized_option(argv[optind - 1]);
444             break;
445         case 'h':
446             help();
447             break;
448         case 'F':
449             base_fmt = optarg;
450             break;
451         case 'b':
452             base_filename = optarg;
453             break;
454         case 'f':
455             fmt = optarg;
456             break;
457         case 'e':
458             error_report("option -e is deprecated, please use \'-o "
459                   "encryption\' instead!");
460             goto fail;
461         case '6':
462             error_report("option -6 is deprecated, please use \'-o "
463                   "compat6\' instead!");
464             goto fail;
465         case 'o':
466             if (!is_valid_option_list(optarg)) {
467                 error_report("Invalid option list: %s", optarg);
468                 goto fail;
469             }
470             if (!options) {
471                 options = g_strdup(optarg);
472             } else {
473                 char *old_options = options;
474                 options = g_strdup_printf("%s,%s", options, optarg);
475                 g_free(old_options);
476             }
477             break;
478         case 'q':
479             quiet = true;
480             break;
481         case OPTION_OBJECT: {
482             QemuOpts *opts;
483             opts = qemu_opts_parse_noisily(&qemu_object_opts,
484                                            optarg, true);
485             if (!opts) {
486                 goto fail;
487             }
488         }   break;
489         }
490     }
491 
492     /* Get the filename */
493     filename = (optind < argc) ? argv[optind] : NULL;
494     if (options && has_help_option(options)) {
495         g_free(options);
496         return print_block_option_help(filename, fmt);
497     }
498 
499     if (optind >= argc) {
500         error_exit("Expecting image file name");
501     }
502     optind++;
503 
504     if (qemu_opts_foreach(&qemu_object_opts,
505                           user_creatable_add_opts_foreach,
506                           NULL, NULL)) {
507         goto fail;
508     }
509 
510     /* Get image size, if specified */
511     if (optind < argc) {
512         int64_t sval;
513 
514         sval = cvtnum(argv[optind++]);
515         if (sval < 0) {
516             if (sval == -ERANGE) {
517                 error_report("Image size must be less than 8 EiB!");
518             } else {
519                 error_report("Invalid image size specified! You may use k, M, "
520                       "G, T, P or E suffixes for ");
521                 error_report("kilobytes, megabytes, gigabytes, terabytes, "
522                              "petabytes and exabytes.");
523             }
524             goto fail;
525         }
526         img_size = (uint64_t)sval;
527     }
528     if (optind != argc) {
529         error_exit("Unexpected argument: %s", argv[optind]);
530     }
531 
532     bdrv_img_create(filename, fmt, base_filename, base_fmt,
533                     options, img_size, 0, quiet, &local_err);
534     if (local_err) {
535         error_reportf_err(local_err, "%s: ", filename);
536         goto fail;
537     }
538 
539     g_free(options);
540     return 0;
541 
542 fail:
543     g_free(options);
544     return 1;
545 }
546 
547 static void dump_json_image_check(ImageCheck *check, bool quiet)
548 {
549     QString *str;
550     QObject *obj;
551     Visitor *v = qobject_output_visitor_new(&obj);
552 
553     visit_type_ImageCheck(v, NULL, &check, &error_abort);
554     visit_complete(v, &obj);
555     str = qobject_to_json_pretty(obj);
556     assert(str != NULL);
557     qprintf(quiet, "%s\n", qstring_get_str(str));
558     qobject_decref(obj);
559     visit_free(v);
560     QDECREF(str);
561 }
562 
563 static void dump_human_image_check(ImageCheck *check, bool quiet)
564 {
565     if (!(check->corruptions || check->leaks || check->check_errors)) {
566         qprintf(quiet, "No errors were found on the image.\n");
567     } else {
568         if (check->corruptions) {
569             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
570                     "Data may be corrupted, or further writes to the image "
571                     "may corrupt it.\n",
572                     check->corruptions);
573         }
574 
575         if (check->leaks) {
576             qprintf(quiet,
577                     "\n%" PRId64 " leaked clusters were found on the image.\n"
578                     "This means waste of disk space, but no harm to data.\n",
579                     check->leaks);
580         }
581 
582         if (check->check_errors) {
583             qprintf(quiet,
584                     "\n%" PRId64
585                     " internal errors have occurred during the check.\n",
586                     check->check_errors);
587         }
588     }
589 
590     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
591         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
592                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
593                 check->allocated_clusters, check->total_clusters,
594                 check->allocated_clusters * 100.0 / check->total_clusters,
595                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
596                 check->compressed_clusters * 100.0 /
597                 check->allocated_clusters);
598     }
599 
600     if (check->image_end_offset) {
601         qprintf(quiet,
602                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
603     }
604 }
605 
606 static int collect_image_check(BlockDriverState *bs,
607                    ImageCheck *check,
608                    const char *filename,
609                    const char *fmt,
610                    int fix)
611 {
612     int ret;
613     BdrvCheckResult result;
614 
615     ret = bdrv_check(bs, &result, fix);
616     if (ret < 0) {
617         return ret;
618     }
619 
620     check->filename                 = g_strdup(filename);
621     check->format                   = g_strdup(bdrv_get_format_name(bs));
622     check->check_errors             = result.check_errors;
623     check->corruptions              = result.corruptions;
624     check->has_corruptions          = result.corruptions != 0;
625     check->leaks                    = result.leaks;
626     check->has_leaks                = result.leaks != 0;
627     check->corruptions_fixed        = result.corruptions_fixed;
628     check->has_corruptions_fixed    = result.corruptions != 0;
629     check->leaks_fixed              = result.leaks_fixed;
630     check->has_leaks_fixed          = result.leaks != 0;
631     check->image_end_offset         = result.image_end_offset;
632     check->has_image_end_offset     = result.image_end_offset != 0;
633     check->total_clusters           = result.bfi.total_clusters;
634     check->has_total_clusters       = result.bfi.total_clusters != 0;
635     check->allocated_clusters       = result.bfi.allocated_clusters;
636     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
637     check->fragmented_clusters      = result.bfi.fragmented_clusters;
638     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
639     check->compressed_clusters      = result.bfi.compressed_clusters;
640     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
641 
642     return 0;
643 }
644 
645 /*
646  * Checks an image for consistency. Exit codes:
647  *
648  *  0 - Check completed, image is good
649  *  1 - Check not completed because of internal errors
650  *  2 - Check completed, image is corrupted
651  *  3 - Check completed, image has leaked clusters, but is good otherwise
652  * 63 - Checks are not supported by the image format
653  */
654 static int img_check(int argc, char **argv)
655 {
656     int c, ret;
657     OutputFormat output_format = OFORMAT_HUMAN;
658     const char *filename, *fmt, *output, *cache;
659     BlockBackend *blk;
660     BlockDriverState *bs;
661     int fix = 0;
662     int flags = BDRV_O_CHECK;
663     bool writethrough;
664     ImageCheck *check;
665     bool quiet = false;
666     bool image_opts = false;
667     bool force_share = false;
668 
669     fmt = NULL;
670     output = NULL;
671     cache = BDRV_DEFAULT_CACHE;
672 
673     for(;;) {
674         int option_index = 0;
675         static const struct option long_options[] = {
676             {"help", no_argument, 0, 'h'},
677             {"format", required_argument, 0, 'f'},
678             {"repair", required_argument, 0, 'r'},
679             {"output", required_argument, 0, OPTION_OUTPUT},
680             {"object", required_argument, 0, OPTION_OBJECT},
681             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
682             {"force-share", no_argument, 0, 'U'},
683             {0, 0, 0, 0}
684         };
685         c = getopt_long(argc, argv, ":hf:r:T:qU",
686                         long_options, &option_index);
687         if (c == -1) {
688             break;
689         }
690         switch(c) {
691         case ':':
692             missing_argument(argv[optind - 1]);
693             break;
694         case '?':
695             unrecognized_option(argv[optind - 1]);
696             break;
697         case 'h':
698             help();
699             break;
700         case 'f':
701             fmt = optarg;
702             break;
703         case 'r':
704             flags |= BDRV_O_RDWR;
705 
706             if (!strcmp(optarg, "leaks")) {
707                 fix = BDRV_FIX_LEAKS;
708             } else if (!strcmp(optarg, "all")) {
709                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
710             } else {
711                 error_exit("Unknown option value for -r "
712                            "(expecting 'leaks' or 'all'): %s", optarg);
713             }
714             break;
715         case OPTION_OUTPUT:
716             output = optarg;
717             break;
718         case 'T':
719             cache = optarg;
720             break;
721         case 'q':
722             quiet = true;
723             break;
724         case 'U':
725             force_share = true;
726             break;
727         case OPTION_OBJECT: {
728             QemuOpts *opts;
729             opts = qemu_opts_parse_noisily(&qemu_object_opts,
730                                            optarg, true);
731             if (!opts) {
732                 return 1;
733             }
734         }   break;
735         case OPTION_IMAGE_OPTS:
736             image_opts = true;
737             break;
738         }
739     }
740     if (optind != argc - 1) {
741         error_exit("Expecting one image file name");
742     }
743     filename = argv[optind++];
744 
745     if (output && !strcmp(output, "json")) {
746         output_format = OFORMAT_JSON;
747     } else if (output && !strcmp(output, "human")) {
748         output_format = OFORMAT_HUMAN;
749     } else if (output) {
750         error_report("--output must be used with human or json as argument.");
751         return 1;
752     }
753 
754     if (qemu_opts_foreach(&qemu_object_opts,
755                           user_creatable_add_opts_foreach,
756                           NULL, NULL)) {
757         return 1;
758     }
759 
760     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
761     if (ret < 0) {
762         error_report("Invalid source cache option: %s", cache);
763         return 1;
764     }
765 
766     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
767                    force_share);
768     if (!blk) {
769         return 1;
770     }
771     bs = blk_bs(blk);
772 
773     check = g_new0(ImageCheck, 1);
774     ret = collect_image_check(bs, check, filename, fmt, fix);
775 
776     if (ret == -ENOTSUP) {
777         error_report("This image format does not support checks");
778         ret = 63;
779         goto fail;
780     }
781 
782     if (check->corruptions_fixed || check->leaks_fixed) {
783         int corruptions_fixed, leaks_fixed;
784 
785         leaks_fixed         = check->leaks_fixed;
786         corruptions_fixed   = check->corruptions_fixed;
787 
788         if (output_format == OFORMAT_HUMAN) {
789             qprintf(quiet,
790                     "The following inconsistencies were found and repaired:\n\n"
791                     "    %" PRId64 " leaked clusters\n"
792                     "    %" PRId64 " corruptions\n\n"
793                     "Double checking the fixed image now...\n",
794                     check->leaks_fixed,
795                     check->corruptions_fixed);
796         }
797 
798         ret = collect_image_check(bs, check, filename, fmt, 0);
799 
800         check->leaks_fixed          = leaks_fixed;
801         check->corruptions_fixed    = corruptions_fixed;
802     }
803 
804     if (!ret) {
805         switch (output_format) {
806         case OFORMAT_HUMAN:
807             dump_human_image_check(check, quiet);
808             break;
809         case OFORMAT_JSON:
810             dump_json_image_check(check, quiet);
811             break;
812         }
813     }
814 
815     if (ret || check->check_errors) {
816         if (ret) {
817             error_report("Check failed: %s", strerror(-ret));
818         } else {
819             error_report("Check failed");
820         }
821         ret = 1;
822         goto fail;
823     }
824 
825     if (check->corruptions) {
826         ret = 2;
827     } else if (check->leaks) {
828         ret = 3;
829     } else {
830         ret = 0;
831     }
832 
833 fail:
834     qapi_free_ImageCheck(check);
835     blk_unref(blk);
836     return ret;
837 }
838 
839 typedef struct CommonBlockJobCBInfo {
840     BlockDriverState *bs;
841     Error **errp;
842 } CommonBlockJobCBInfo;
843 
844 static void common_block_job_cb(void *opaque, int ret)
845 {
846     CommonBlockJobCBInfo *cbi = opaque;
847 
848     if (ret < 0) {
849         error_setg_errno(cbi->errp, -ret, "Block job failed");
850     }
851 }
852 
853 static void run_block_job(BlockJob *job, Error **errp)
854 {
855     AioContext *aio_context = blk_get_aio_context(job->blk);
856 
857     /* FIXME In error cases, the job simply goes away and we access a dangling
858      * pointer below. */
859     aio_context_acquire(aio_context);
860     do {
861         aio_poll(aio_context, true);
862         qemu_progress_print(job->len ?
863                             ((float)job->offset / job->len * 100.f) : 0.0f, 0);
864     } while (!job->ready);
865 
866     block_job_complete_sync(job, errp);
867     aio_context_release(aio_context);
868 
869     /* A block job may finish instantaneously without publishing any progress,
870      * so just signal completion here */
871     qemu_progress_print(100.f, 0);
872 }
873 
874 static int img_commit(int argc, char **argv)
875 {
876     int c, ret, flags;
877     const char *filename, *fmt, *cache, *base;
878     BlockBackend *blk;
879     BlockDriverState *bs, *base_bs;
880     BlockJob *job;
881     bool progress = false, quiet = false, drop = false;
882     bool writethrough;
883     Error *local_err = NULL;
884     CommonBlockJobCBInfo cbi;
885     bool image_opts = false;
886     AioContext *aio_context;
887 
888     fmt = NULL;
889     cache = BDRV_DEFAULT_CACHE;
890     base = NULL;
891     for(;;) {
892         static const struct option long_options[] = {
893             {"help", no_argument, 0, 'h'},
894             {"object", required_argument, 0, OPTION_OBJECT},
895             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
896             {0, 0, 0, 0}
897         };
898         c = getopt_long(argc, argv, ":f:ht:b:dpq",
899                         long_options, NULL);
900         if (c == -1) {
901             break;
902         }
903         switch(c) {
904         case ':':
905             missing_argument(argv[optind - 1]);
906             break;
907         case '?':
908             unrecognized_option(argv[optind - 1]);
909             break;
910         case 'h':
911             help();
912             break;
913         case 'f':
914             fmt = optarg;
915             break;
916         case 't':
917             cache = optarg;
918             break;
919         case 'b':
920             base = optarg;
921             /* -b implies -d */
922             drop = true;
923             break;
924         case 'd':
925             drop = true;
926             break;
927         case 'p':
928             progress = true;
929             break;
930         case 'q':
931             quiet = true;
932             break;
933         case OPTION_OBJECT: {
934             QemuOpts *opts;
935             opts = qemu_opts_parse_noisily(&qemu_object_opts,
936                                            optarg, true);
937             if (!opts) {
938                 return 1;
939             }
940         }   break;
941         case OPTION_IMAGE_OPTS:
942             image_opts = true;
943             break;
944         }
945     }
946 
947     /* Progress is not shown in Quiet mode */
948     if (quiet) {
949         progress = false;
950     }
951 
952     if (optind != argc - 1) {
953         error_exit("Expecting one image file name");
954     }
955     filename = argv[optind++];
956 
957     if (qemu_opts_foreach(&qemu_object_opts,
958                           user_creatable_add_opts_foreach,
959                           NULL, NULL)) {
960         return 1;
961     }
962 
963     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
964     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
965     if (ret < 0) {
966         error_report("Invalid cache option: %s", cache);
967         return 1;
968     }
969 
970     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
971                    false);
972     if (!blk) {
973         return 1;
974     }
975     bs = blk_bs(blk);
976 
977     qemu_progress_init(progress, 1.f);
978     qemu_progress_print(0.f, 100);
979 
980     if (base) {
981         base_bs = bdrv_find_backing_image(bs, base);
982         if (!base_bs) {
983             error_setg(&local_err,
984                        "Did not find '%s' in the backing chain of '%s'",
985                        base, filename);
986             goto done;
987         }
988     } else {
989         /* This is different from QMP, which by default uses the deepest file in
990          * the backing chain (i.e., the very base); however, the traditional
991          * behavior of qemu-img commit is using the immediate backing file. */
992         base_bs = backing_bs(bs);
993         if (!base_bs) {
994             error_setg(&local_err, "Image does not have a backing file");
995             goto done;
996         }
997     }
998 
999     cbi = (CommonBlockJobCBInfo){
1000         .errp = &local_err,
1001         .bs   = bs,
1002     };
1003 
1004     aio_context = bdrv_get_aio_context(bs);
1005     aio_context_acquire(aio_context);
1006     commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0,
1007                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1008                         &cbi, false, &local_err);
1009     aio_context_release(aio_context);
1010     if (local_err) {
1011         goto done;
1012     }
1013 
1014     /* When the block job completes, the BlockBackend reference will point to
1015      * the old backing file. In order to avoid that the top image is already
1016      * deleted, so we can still empty it afterwards, increment the reference
1017      * counter here preemptively. */
1018     if (!drop) {
1019         bdrv_ref(bs);
1020     }
1021 
1022     job = block_job_get("commit");
1023     run_block_job(job, &local_err);
1024     if (local_err) {
1025         goto unref_backing;
1026     }
1027 
1028     if (!drop && bs->drv->bdrv_make_empty) {
1029         ret = bs->drv->bdrv_make_empty(bs);
1030         if (ret) {
1031             error_setg_errno(&local_err, -ret, "Could not empty %s",
1032                              filename);
1033             goto unref_backing;
1034         }
1035     }
1036 
1037 unref_backing:
1038     if (!drop) {
1039         bdrv_unref(bs);
1040     }
1041 
1042 done:
1043     qemu_progress_end();
1044 
1045     blk_unref(blk);
1046 
1047     if (local_err) {
1048         error_report_err(local_err);
1049         return 1;
1050     }
1051 
1052     qprintf(quiet, "Image committed.\n");
1053     return 0;
1054 }
1055 
1056 /*
1057  * Returns true iff the first sector pointed to by 'buf' contains at least
1058  * a non-NUL byte.
1059  *
1060  * 'pnum' is set to the number of sectors (including and immediately following
1061  * the first one) that are known to be in the same allocated/unallocated state.
1062  */
1063 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum)
1064 {
1065     bool is_zero;
1066     int i;
1067 
1068     if (n <= 0) {
1069         *pnum = 0;
1070         return 0;
1071     }
1072     is_zero = buffer_is_zero(buf, 512);
1073     for(i = 1; i < n; i++) {
1074         buf += 512;
1075         if (is_zero != buffer_is_zero(buf, 512)) {
1076             break;
1077         }
1078     }
1079     *pnum = i;
1080     return !is_zero;
1081 }
1082 
1083 /*
1084  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1085  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1086  * breaking up write requests for only small sparse areas.
1087  */
1088 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1089     int min)
1090 {
1091     int ret;
1092     int num_checked, num_used;
1093 
1094     if (n < min) {
1095         min = n;
1096     }
1097 
1098     ret = is_allocated_sectors(buf, n, pnum);
1099     if (!ret) {
1100         return ret;
1101     }
1102 
1103     num_used = *pnum;
1104     buf += BDRV_SECTOR_SIZE * *pnum;
1105     n -= *pnum;
1106     num_checked = num_used;
1107 
1108     while (n > 0) {
1109         ret = is_allocated_sectors(buf, n, pnum);
1110 
1111         buf += BDRV_SECTOR_SIZE * *pnum;
1112         n -= *pnum;
1113         num_checked += *pnum;
1114         if (ret) {
1115             num_used = num_checked;
1116         } else if (*pnum >= min) {
1117             break;
1118         }
1119     }
1120 
1121     *pnum = num_used;
1122     return 1;
1123 }
1124 
1125 /*
1126  * Compares two buffers sector by sector. Returns 0 if the first sector of both
1127  * buffers matches, non-zero otherwise.
1128  *
1129  * pnum is set to the number of sectors (including and immediately following
1130  * the first one) that are known to have the same comparison result
1131  */
1132 static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n,
1133     int *pnum)
1134 {
1135     bool res;
1136     int i;
1137 
1138     if (n <= 0) {
1139         *pnum = 0;
1140         return 0;
1141     }
1142 
1143     res = !!memcmp(buf1, buf2, 512);
1144     for(i = 1; i < n; i++) {
1145         buf1 += 512;
1146         buf2 += 512;
1147 
1148         if (!!memcmp(buf1, buf2, 512) != res) {
1149             break;
1150         }
1151     }
1152 
1153     *pnum = i;
1154     return res;
1155 }
1156 
1157 #define IO_BUF_SIZE (2 * 1024 * 1024)
1158 
1159 static int64_t sectors_to_bytes(int64_t sectors)
1160 {
1161     return sectors << BDRV_SECTOR_BITS;
1162 }
1163 
1164 static int64_t sectors_to_process(int64_t total, int64_t from)
1165 {
1166     return MIN(total - from, IO_BUF_SIZE >> BDRV_SECTOR_BITS);
1167 }
1168 
1169 /*
1170  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1171  *
1172  * Returns 0 in case sectors are filled with 0, 1 if sectors contain non-zero
1173  * data and negative value on error.
1174  *
1175  * @param blk:  BlockBackend for the image
1176  * @param sect_num: Number of first sector to check
1177  * @param sect_count: Number of sectors to check
1178  * @param filename: Name of disk file we are checking (logging purpose)
1179  * @param buffer: Allocated buffer for storing read data
1180  * @param quiet: Flag for quiet mode
1181  */
1182 static int check_empty_sectors(BlockBackend *blk, int64_t sect_num,
1183                                int sect_count, const char *filename,
1184                                uint8_t *buffer, bool quiet)
1185 {
1186     int pnum, ret = 0;
1187     ret = blk_pread(blk, sect_num << BDRV_SECTOR_BITS, buffer,
1188                     sect_count << BDRV_SECTOR_BITS);
1189     if (ret < 0) {
1190         error_report("Error while reading offset %" PRId64 " of %s: %s",
1191                      sectors_to_bytes(sect_num), filename, strerror(-ret));
1192         return ret;
1193     }
1194     ret = is_allocated_sectors(buffer, sect_count, &pnum);
1195     if (ret || pnum != sect_count) {
1196         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1197                 sectors_to_bytes(ret ? sect_num : sect_num + pnum));
1198         return 1;
1199     }
1200 
1201     return 0;
1202 }
1203 
1204 /*
1205  * Compares two images. Exit codes:
1206  *
1207  * 0 - Images are identical
1208  * 1 - Images differ
1209  * >1 - Error occurred
1210  */
1211 static int img_compare(int argc, char **argv)
1212 {
1213     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1214     BlockBackend *blk1, *blk2;
1215     BlockDriverState *bs1, *bs2;
1216     int64_t total_sectors1, total_sectors2;
1217     uint8_t *buf1 = NULL, *buf2 = NULL;
1218     int pnum1, pnum2;
1219     int allocated1, allocated2;
1220     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1221     bool progress = false, quiet = false, strict = false;
1222     int flags;
1223     bool writethrough;
1224     int64_t total_sectors;
1225     int64_t sector_num = 0;
1226     int64_t nb_sectors;
1227     int c, pnum;
1228     uint64_t progress_base;
1229     bool image_opts = false;
1230     bool force_share = false;
1231 
1232     cache = BDRV_DEFAULT_CACHE;
1233     for (;;) {
1234         static const struct option long_options[] = {
1235             {"help", no_argument, 0, 'h'},
1236             {"object", required_argument, 0, OPTION_OBJECT},
1237             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1238             {"force-share", no_argument, 0, 'U'},
1239             {0, 0, 0, 0}
1240         };
1241         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1242                         long_options, NULL);
1243         if (c == -1) {
1244             break;
1245         }
1246         switch (c) {
1247         case ':':
1248             missing_argument(argv[optind - 1]);
1249             break;
1250         case '?':
1251             unrecognized_option(argv[optind - 1]);
1252             break;
1253         case 'h':
1254             help();
1255             break;
1256         case 'f':
1257             fmt1 = optarg;
1258             break;
1259         case 'F':
1260             fmt2 = optarg;
1261             break;
1262         case 'T':
1263             cache = optarg;
1264             break;
1265         case 'p':
1266             progress = true;
1267             break;
1268         case 'q':
1269             quiet = true;
1270             break;
1271         case 's':
1272             strict = true;
1273             break;
1274         case 'U':
1275             force_share = true;
1276             break;
1277         case OPTION_OBJECT: {
1278             QemuOpts *opts;
1279             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1280                                            optarg, true);
1281             if (!opts) {
1282                 ret = 2;
1283                 goto out4;
1284             }
1285         }   break;
1286         case OPTION_IMAGE_OPTS:
1287             image_opts = true;
1288             break;
1289         }
1290     }
1291 
1292     /* Progress is not shown in Quiet mode */
1293     if (quiet) {
1294         progress = false;
1295     }
1296 
1297 
1298     if (optind != argc - 2) {
1299         error_exit("Expecting two image file names");
1300     }
1301     filename1 = argv[optind++];
1302     filename2 = argv[optind++];
1303 
1304     if (qemu_opts_foreach(&qemu_object_opts,
1305                           user_creatable_add_opts_foreach,
1306                           NULL, NULL)) {
1307         ret = 2;
1308         goto out4;
1309     }
1310 
1311     /* Initialize before goto out */
1312     qemu_progress_init(progress, 2.0);
1313 
1314     flags = 0;
1315     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1316     if (ret < 0) {
1317         error_report("Invalid source cache option: %s", cache);
1318         ret = 2;
1319         goto out3;
1320     }
1321 
1322     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1323                     force_share);
1324     if (!blk1) {
1325         ret = 2;
1326         goto out3;
1327     }
1328 
1329     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1330                     force_share);
1331     if (!blk2) {
1332         ret = 2;
1333         goto out2;
1334     }
1335     bs1 = blk_bs(blk1);
1336     bs2 = blk_bs(blk2);
1337 
1338     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1339     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1340     total_sectors1 = blk_nb_sectors(blk1);
1341     if (total_sectors1 < 0) {
1342         error_report("Can't get size of %s: %s",
1343                      filename1, strerror(-total_sectors1));
1344         ret = 4;
1345         goto out;
1346     }
1347     total_sectors2 = blk_nb_sectors(blk2);
1348     if (total_sectors2 < 0) {
1349         error_report("Can't get size of %s: %s",
1350                      filename2, strerror(-total_sectors2));
1351         ret = 4;
1352         goto out;
1353     }
1354     total_sectors = MIN(total_sectors1, total_sectors2);
1355     progress_base = MAX(total_sectors1, total_sectors2);
1356 
1357     qemu_progress_print(0, 100);
1358 
1359     if (strict && total_sectors1 != total_sectors2) {
1360         ret = 1;
1361         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1362         goto out;
1363     }
1364 
1365     for (;;) {
1366         int64_t status1, status2;
1367         BlockDriverState *file;
1368 
1369         nb_sectors = sectors_to_process(total_sectors, sector_num);
1370         if (nb_sectors <= 0) {
1371             break;
1372         }
1373         status1 = bdrv_get_block_status_above(bs1, NULL, sector_num,
1374                                               total_sectors1 - sector_num,
1375                                               &pnum1, &file);
1376         if (status1 < 0) {
1377             ret = 3;
1378             error_report("Sector allocation test failed for %s", filename1);
1379             goto out;
1380         }
1381         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1382 
1383         status2 = bdrv_get_block_status_above(bs2, NULL, sector_num,
1384                                               total_sectors2 - sector_num,
1385                                               &pnum2, &file);
1386         if (status2 < 0) {
1387             ret = 3;
1388             error_report("Sector allocation test failed for %s", filename2);
1389             goto out;
1390         }
1391         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1392         if (pnum1) {
1393             nb_sectors = MIN(nb_sectors, pnum1);
1394         }
1395         if (pnum2) {
1396             nb_sectors = MIN(nb_sectors, pnum2);
1397         }
1398 
1399         if (strict) {
1400             if ((status1 & ~BDRV_BLOCK_OFFSET_MASK) !=
1401                 (status2 & ~BDRV_BLOCK_OFFSET_MASK)) {
1402                 ret = 1;
1403                 qprintf(quiet, "Strict mode: Offset %" PRId64
1404                         " block status mismatch!\n",
1405                         sectors_to_bytes(sector_num));
1406                 goto out;
1407             }
1408         }
1409         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1410             nb_sectors = MIN(pnum1, pnum2);
1411         } else if (allocated1 == allocated2) {
1412             if (allocated1) {
1413                 ret = blk_pread(blk1, sector_num << BDRV_SECTOR_BITS, buf1,
1414                                 nb_sectors << BDRV_SECTOR_BITS);
1415                 if (ret < 0) {
1416                     error_report("Error while reading offset %" PRId64 " of %s:"
1417                                  " %s", sectors_to_bytes(sector_num), filename1,
1418                                  strerror(-ret));
1419                     ret = 4;
1420                     goto out;
1421                 }
1422                 ret = blk_pread(blk2, sector_num << BDRV_SECTOR_BITS, buf2,
1423                                 nb_sectors << BDRV_SECTOR_BITS);
1424                 if (ret < 0) {
1425                     error_report("Error while reading offset %" PRId64
1426                                  " of %s: %s", sectors_to_bytes(sector_num),
1427                                  filename2, strerror(-ret));
1428                     ret = 4;
1429                     goto out;
1430                 }
1431                 ret = compare_sectors(buf1, buf2, nb_sectors, &pnum);
1432                 if (ret || pnum != nb_sectors) {
1433                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1434                             sectors_to_bytes(
1435                                 ret ? sector_num : sector_num + pnum));
1436                     ret = 1;
1437                     goto out;
1438                 }
1439             }
1440         } else {
1441 
1442             if (allocated1) {
1443                 ret = check_empty_sectors(blk1, sector_num, nb_sectors,
1444                                           filename1, buf1, quiet);
1445             } else {
1446                 ret = check_empty_sectors(blk2, sector_num, nb_sectors,
1447                                           filename2, buf1, quiet);
1448             }
1449             if (ret) {
1450                 if (ret < 0) {
1451                     error_report("Error while reading offset %" PRId64 ": %s",
1452                                  sectors_to_bytes(sector_num), strerror(-ret));
1453                     ret = 4;
1454                 }
1455                 goto out;
1456             }
1457         }
1458         sector_num += nb_sectors;
1459         qemu_progress_print(((float) nb_sectors / progress_base)*100, 100);
1460     }
1461 
1462     if (total_sectors1 != total_sectors2) {
1463         BlockBackend *blk_over;
1464         int64_t total_sectors_over;
1465         const char *filename_over;
1466 
1467         qprintf(quiet, "Warning: Image size mismatch!\n");
1468         if (total_sectors1 > total_sectors2) {
1469             total_sectors_over = total_sectors1;
1470             blk_over = blk1;
1471             filename_over = filename1;
1472         } else {
1473             total_sectors_over = total_sectors2;
1474             blk_over = blk2;
1475             filename_over = filename2;
1476         }
1477 
1478         for (;;) {
1479             nb_sectors = sectors_to_process(total_sectors_over, sector_num);
1480             if (nb_sectors <= 0) {
1481                 break;
1482             }
1483             ret = bdrv_is_allocated_above(blk_bs(blk_over), NULL, sector_num,
1484                                           nb_sectors, &pnum);
1485             if (ret < 0) {
1486                 ret = 3;
1487                 error_report("Sector allocation test failed for %s",
1488                              filename_over);
1489                 goto out;
1490 
1491             }
1492             nb_sectors = pnum;
1493             if (ret) {
1494                 ret = check_empty_sectors(blk_over, sector_num, nb_sectors,
1495                                           filename_over, buf1, quiet);
1496                 if (ret) {
1497                     if (ret < 0) {
1498                         error_report("Error while reading offset %" PRId64
1499                                      " of %s: %s", sectors_to_bytes(sector_num),
1500                                      filename_over, strerror(-ret));
1501                         ret = 4;
1502                     }
1503                     goto out;
1504                 }
1505             }
1506             sector_num += nb_sectors;
1507             qemu_progress_print(((float) nb_sectors / progress_base)*100, 100);
1508         }
1509     }
1510 
1511     qprintf(quiet, "Images are identical.\n");
1512     ret = 0;
1513 
1514 out:
1515     qemu_vfree(buf1);
1516     qemu_vfree(buf2);
1517     blk_unref(blk2);
1518 out2:
1519     blk_unref(blk1);
1520 out3:
1521     qemu_progress_end();
1522 out4:
1523     return ret;
1524 }
1525 
1526 enum ImgConvertBlockStatus {
1527     BLK_DATA,
1528     BLK_ZERO,
1529     BLK_BACKING_FILE,
1530 };
1531 
1532 #define MAX_COROUTINES 16
1533 
1534 typedef struct ImgConvertState {
1535     BlockBackend **src;
1536     int64_t *src_sectors;
1537     int src_num;
1538     int64_t total_sectors;
1539     int64_t allocated_sectors;
1540     int64_t allocated_done;
1541     int64_t sector_num;
1542     int64_t wr_offs;
1543     enum ImgConvertBlockStatus status;
1544     int64_t sector_next_status;
1545     BlockBackend *target;
1546     bool has_zero_init;
1547     bool compressed;
1548     bool target_has_backing;
1549     bool wr_in_order;
1550     int min_sparse;
1551     size_t cluster_sectors;
1552     size_t buf_sectors;
1553     long num_coroutines;
1554     int running_coroutines;
1555     Coroutine *co[MAX_COROUTINES];
1556     int64_t wait_sector_num[MAX_COROUTINES];
1557     CoMutex lock;
1558     int ret;
1559 } ImgConvertState;
1560 
1561 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1562                                 int *src_cur, int64_t *src_cur_offset)
1563 {
1564     *src_cur = 0;
1565     *src_cur_offset = 0;
1566     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1567         *src_cur_offset += s->src_sectors[*src_cur];
1568         (*src_cur)++;
1569         assert(*src_cur < s->src_num);
1570     }
1571 }
1572 
1573 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1574 {
1575     int64_t ret, src_cur_offset;
1576     int n, src_cur;
1577 
1578     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1579 
1580     assert(s->total_sectors > sector_num);
1581     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1582 
1583     if (s->sector_next_status <= sector_num) {
1584         BlockDriverState *file;
1585         if (s->target_has_backing) {
1586             ret = bdrv_get_block_status(blk_bs(s->src[src_cur]),
1587                                         sector_num - src_cur_offset,
1588                                         n, &n, &file);
1589         } else {
1590             ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL,
1591                                               sector_num - src_cur_offset,
1592                                               n, &n, &file);
1593         }
1594         if (ret < 0) {
1595             return ret;
1596         }
1597 
1598         if (ret & BDRV_BLOCK_ZERO) {
1599             s->status = BLK_ZERO;
1600         } else if (ret & BDRV_BLOCK_DATA) {
1601             s->status = BLK_DATA;
1602         } else {
1603             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1604         }
1605 
1606         s->sector_next_status = sector_num + n;
1607     }
1608 
1609     n = MIN(n, s->sector_next_status - sector_num);
1610     if (s->status == BLK_DATA) {
1611         n = MIN(n, s->buf_sectors);
1612     }
1613 
1614     /* We need to write complete clusters for compressed images, so if an
1615      * unallocated area is shorter than that, we must consider the whole
1616      * cluster allocated. */
1617     if (s->compressed) {
1618         if (n < s->cluster_sectors) {
1619             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1620             s->status = BLK_DATA;
1621         } else {
1622             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1623         }
1624     }
1625 
1626     return n;
1627 }
1628 
1629 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1630                                         int nb_sectors, uint8_t *buf)
1631 {
1632     int n, ret;
1633     QEMUIOVector qiov;
1634     struct iovec iov;
1635 
1636     assert(nb_sectors <= s->buf_sectors);
1637     while (nb_sectors > 0) {
1638         BlockBackend *blk;
1639         int src_cur;
1640         int64_t bs_sectors, src_cur_offset;
1641 
1642         /* In the case of compression with multiple source files, we can get a
1643          * nb_sectors that spreads into the next part. So we must be able to
1644          * read across multiple BDSes for one convert_read() call. */
1645         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1646         blk = s->src[src_cur];
1647         bs_sectors = s->src_sectors[src_cur];
1648 
1649         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1650         iov.iov_base = buf;
1651         iov.iov_len = n << BDRV_SECTOR_BITS;
1652         qemu_iovec_init_external(&qiov, &iov, 1);
1653 
1654         ret = blk_co_preadv(
1655                 blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS,
1656                 n << BDRV_SECTOR_BITS, &qiov, 0);
1657         if (ret < 0) {
1658             return ret;
1659         }
1660 
1661         sector_num += n;
1662         nb_sectors -= n;
1663         buf += n * BDRV_SECTOR_SIZE;
1664     }
1665 
1666     return 0;
1667 }
1668 
1669 
1670 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1671                                          int nb_sectors, uint8_t *buf,
1672                                          enum ImgConvertBlockStatus status)
1673 {
1674     int ret;
1675     QEMUIOVector qiov;
1676     struct iovec iov;
1677 
1678     while (nb_sectors > 0) {
1679         int n = nb_sectors;
1680         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1681 
1682         switch (status) {
1683         case BLK_BACKING_FILE:
1684             /* If we have a backing file, leave clusters unallocated that are
1685              * unallocated in the source image, so that the backing file is
1686              * visible at the respective offset. */
1687             assert(s->target_has_backing);
1688             break;
1689 
1690         case BLK_DATA:
1691             /* If we're told to keep the target fully allocated (-S 0) or there
1692              * is real non-zero data, we must write it. Otherwise we can treat
1693              * it as zero sectors.
1694              * Compressed clusters need to be written as a whole, so in that
1695              * case we can only save the write if the buffer is completely
1696              * zeroed. */
1697             if (!s->min_sparse ||
1698                 (!s->compressed &&
1699                  is_allocated_sectors_min(buf, n, &n, s->min_sparse)) ||
1700                 (s->compressed &&
1701                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1702             {
1703                 iov.iov_base = buf;
1704                 iov.iov_len = n << BDRV_SECTOR_BITS;
1705                 qemu_iovec_init_external(&qiov, &iov, 1);
1706 
1707                 ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
1708                                      n << BDRV_SECTOR_BITS, &qiov, flags);
1709                 if (ret < 0) {
1710                     return ret;
1711                 }
1712                 break;
1713             }
1714             /* fall-through */
1715 
1716         case BLK_ZERO:
1717             if (s->has_zero_init) {
1718                 assert(!s->target_has_backing);
1719                 break;
1720             }
1721             ret = blk_co_pwrite_zeroes(s->target,
1722                                        sector_num << BDRV_SECTOR_BITS,
1723                                        n << BDRV_SECTOR_BITS, 0);
1724             if (ret < 0) {
1725                 return ret;
1726             }
1727             break;
1728         }
1729 
1730         sector_num += n;
1731         nb_sectors -= n;
1732         buf += n * BDRV_SECTOR_SIZE;
1733     }
1734 
1735     return 0;
1736 }
1737 
1738 static void coroutine_fn convert_co_do_copy(void *opaque)
1739 {
1740     ImgConvertState *s = opaque;
1741     uint8_t *buf = NULL;
1742     int ret, i;
1743     int index = -1;
1744 
1745     for (i = 0; i < s->num_coroutines; i++) {
1746         if (s->co[i] == qemu_coroutine_self()) {
1747             index = i;
1748             break;
1749         }
1750     }
1751     assert(index >= 0);
1752 
1753     s->running_coroutines++;
1754     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1755 
1756     while (1) {
1757         int n;
1758         int64_t sector_num;
1759         enum ImgConvertBlockStatus status;
1760 
1761         qemu_co_mutex_lock(&s->lock);
1762         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1763             qemu_co_mutex_unlock(&s->lock);
1764             break;
1765         }
1766         n = convert_iteration_sectors(s, s->sector_num);
1767         if (n < 0) {
1768             qemu_co_mutex_unlock(&s->lock);
1769             s->ret = n;
1770             break;
1771         }
1772         /* save current sector and allocation status to local variables */
1773         sector_num = s->sector_num;
1774         status = s->status;
1775         if (!s->min_sparse && s->status == BLK_ZERO) {
1776             n = MIN(n, s->buf_sectors);
1777         }
1778         /* increment global sector counter so that other coroutines can
1779          * already continue reading beyond this request */
1780         s->sector_num += n;
1781         qemu_co_mutex_unlock(&s->lock);
1782 
1783         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
1784             s->allocated_done += n;
1785             qemu_progress_print(100.0 * s->allocated_done /
1786                                         s->allocated_sectors, 0);
1787         }
1788 
1789         if (status == BLK_DATA) {
1790             ret = convert_co_read(s, sector_num, n, buf);
1791             if (ret < 0) {
1792                 error_report("error while reading sector %" PRId64
1793                              ": %s", sector_num, strerror(-ret));
1794                 s->ret = ret;
1795             }
1796         } else if (!s->min_sparse && status == BLK_ZERO) {
1797             status = BLK_DATA;
1798             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
1799         }
1800 
1801         if (s->wr_in_order) {
1802             /* keep writes in order */
1803             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
1804                 s->wait_sector_num[index] = sector_num;
1805                 qemu_coroutine_yield();
1806             }
1807             s->wait_sector_num[index] = -1;
1808         }
1809 
1810         if (s->ret == -EINPROGRESS) {
1811             ret = convert_co_write(s, sector_num, n, buf, status);
1812             if (ret < 0) {
1813                 error_report("error while writing sector %" PRId64
1814                              ": %s", sector_num, strerror(-ret));
1815                 s->ret = ret;
1816             }
1817         }
1818 
1819         if (s->wr_in_order) {
1820             /* reenter the coroutine that might have waited
1821              * for this write to complete */
1822             s->wr_offs = sector_num + n;
1823             for (i = 0; i < s->num_coroutines; i++) {
1824                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
1825                     /*
1826                      * A -> B -> A cannot occur because A has
1827                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
1828                      * B will never enter A during this time window.
1829                      */
1830                     qemu_coroutine_enter(s->co[i]);
1831                     break;
1832                 }
1833             }
1834         }
1835     }
1836 
1837     qemu_vfree(buf);
1838     s->co[index] = NULL;
1839     s->running_coroutines--;
1840     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
1841         /* the convert job finished successfully */
1842         s->ret = 0;
1843     }
1844 }
1845 
1846 static int convert_do_copy(ImgConvertState *s)
1847 {
1848     int ret, i, n;
1849     int64_t sector_num = 0;
1850 
1851     /* Check whether we have zero initialisation or can get it efficiently */
1852     s->has_zero_init = s->min_sparse && !s->target_has_backing
1853                      ? bdrv_has_zero_init(blk_bs(s->target))
1854                      : false;
1855 
1856     if (!s->has_zero_init && !s->target_has_backing &&
1857         bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
1858     {
1859         ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP);
1860         if (ret == 0) {
1861             s->has_zero_init = true;
1862         }
1863     }
1864 
1865     /* Allocate buffer for copied data. For compressed images, only one cluster
1866      * can be copied at a time. */
1867     if (s->compressed) {
1868         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
1869             error_report("invalid cluster size");
1870             return -EINVAL;
1871         }
1872         s->buf_sectors = s->cluster_sectors;
1873     }
1874 
1875     while (sector_num < s->total_sectors) {
1876         n = convert_iteration_sectors(s, sector_num);
1877         if (n < 0) {
1878             return n;
1879         }
1880         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
1881         {
1882             s->allocated_sectors += n;
1883         }
1884         sector_num += n;
1885     }
1886 
1887     /* Do the copy */
1888     s->sector_next_status = 0;
1889     s->ret = -EINPROGRESS;
1890 
1891     qemu_co_mutex_init(&s->lock);
1892     for (i = 0; i < s->num_coroutines; i++) {
1893         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
1894         s->wait_sector_num[i] = -1;
1895         qemu_coroutine_enter(s->co[i]);
1896     }
1897 
1898     while (s->running_coroutines) {
1899         main_loop_wait(false);
1900     }
1901 
1902     if (s->compressed && !s->ret) {
1903         /* signal EOF to align */
1904         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
1905         if (ret < 0) {
1906             return ret;
1907         }
1908     }
1909 
1910     return s->ret;
1911 }
1912 
1913 static int img_convert(int argc, char **argv)
1914 {
1915     int c, bs_i, flags, src_flags = 0;
1916     const char *fmt = NULL, *out_fmt = "raw", *cache = "unsafe",
1917                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
1918                *out_filename, *out_baseimg_param, *snapshot_name = NULL;
1919     BlockDriver *drv, *proto_drv;
1920     BlockDriverInfo bdi;
1921     BlockDriverState *out_bs;
1922     QemuOpts *opts = NULL, *sn_opts = NULL;
1923     QemuOptsList *create_opts = NULL;
1924     char *options = NULL;
1925     Error *local_err = NULL;
1926     bool writethrough, src_writethrough, quiet = false, image_opts = false,
1927          skip_create = false, progress = false;
1928     int64_t ret = -EINVAL;
1929     bool force_share = false;
1930 
1931     ImgConvertState s = (ImgConvertState) {
1932         /* Need at least 4k of zeros for sparse detection */
1933         .min_sparse         = 8,
1934         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
1935         .wr_in_order        = true,
1936         .num_coroutines     = 8,
1937     };
1938 
1939     for(;;) {
1940         static const struct option long_options[] = {
1941             {"help", no_argument, 0, 'h'},
1942             {"object", required_argument, 0, OPTION_OBJECT},
1943             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1944             {"force-share", no_argument, 0, 'U'},
1945             {0, 0, 0, 0}
1946         };
1947         c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:WU",
1948                         long_options, NULL);
1949         if (c == -1) {
1950             break;
1951         }
1952         switch(c) {
1953         case ':':
1954             missing_argument(argv[optind - 1]);
1955             break;
1956         case '?':
1957             unrecognized_option(argv[optind - 1]);
1958             break;
1959         case 'h':
1960             help();
1961             break;
1962         case 'f':
1963             fmt = optarg;
1964             break;
1965         case 'O':
1966             out_fmt = optarg;
1967             break;
1968         case 'B':
1969             out_baseimg = optarg;
1970             break;
1971         case 'c':
1972             s.compressed = true;
1973             break;
1974         case 'e':
1975             error_report("option -e is deprecated, please use \'-o "
1976                   "encryption\' instead!");
1977             goto fail_getopt;
1978         case '6':
1979             error_report("option -6 is deprecated, please use \'-o "
1980                   "compat6\' instead!");
1981             goto fail_getopt;
1982         case 'o':
1983             if (!is_valid_option_list(optarg)) {
1984                 error_report("Invalid option list: %s", optarg);
1985                 goto fail_getopt;
1986             }
1987             if (!options) {
1988                 options = g_strdup(optarg);
1989             } else {
1990                 char *old_options = options;
1991                 options = g_strdup_printf("%s,%s", options, optarg);
1992                 g_free(old_options);
1993             }
1994             break;
1995         case 's':
1996             snapshot_name = optarg;
1997             break;
1998         case 'l':
1999             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2000                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2001                                                   optarg, false);
2002                 if (!sn_opts) {
2003                     error_report("Failed in parsing snapshot param '%s'",
2004                                  optarg);
2005                     goto fail_getopt;
2006                 }
2007             } else {
2008                 snapshot_name = optarg;
2009             }
2010             break;
2011         case 'S':
2012         {
2013             int64_t sval;
2014 
2015             sval = cvtnum(optarg);
2016             if (sval < 0) {
2017                 error_report("Invalid minimum zero buffer size for sparse output specified");
2018                 goto fail_getopt;
2019             }
2020 
2021             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2022             break;
2023         }
2024         case 'p':
2025             progress = true;
2026             break;
2027         case 't':
2028             cache = optarg;
2029             break;
2030         case 'T':
2031             src_cache = optarg;
2032             break;
2033         case 'q':
2034             quiet = true;
2035             break;
2036         case 'n':
2037             skip_create = true;
2038             break;
2039         case 'm':
2040             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2041                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2042                 error_report("Invalid number of coroutines. Allowed number of"
2043                              " coroutines is between 1 and %d", MAX_COROUTINES);
2044                 goto fail_getopt;
2045             }
2046             break;
2047         case 'W':
2048             s.wr_in_order = false;
2049             break;
2050         case 'U':
2051             force_share = true;
2052             break;
2053         case OPTION_OBJECT: {
2054             QemuOpts *object_opts;
2055             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
2056                                                   optarg, true);
2057             if (!object_opts) {
2058                 goto fail_getopt;
2059             }
2060             break;
2061         }
2062         case OPTION_IMAGE_OPTS:
2063             image_opts = true;
2064             break;
2065         }
2066     }
2067 
2068     if (qemu_opts_foreach(&qemu_object_opts,
2069                           user_creatable_add_opts_foreach,
2070                           NULL, NULL)) {
2071         goto fail_getopt;
2072     }
2073 
2074     if (!s.wr_in_order && s.compressed) {
2075         error_report("Out of order write and compress are mutually exclusive");
2076         goto fail_getopt;
2077     }
2078 
2079     s.src_num = argc - optind - 1;
2080     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2081 
2082     if (options && has_help_option(options)) {
2083         ret = print_block_option_help(out_filename, out_fmt);
2084         goto fail_getopt;
2085     }
2086 
2087     if (s.src_num < 1) {
2088         error_report("Must specify image file name");
2089         goto fail_getopt;
2090     }
2091 
2092 
2093     /* ret is still -EINVAL until here */
2094     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2095     if (ret < 0) {
2096         error_report("Invalid source cache option: %s", src_cache);
2097         goto fail_getopt;
2098     }
2099 
2100     /* Initialize before goto out */
2101     if (quiet) {
2102         progress = false;
2103     }
2104     qemu_progress_init(progress, 1.0);
2105     qemu_progress_print(0, 100);
2106 
2107     s.src = g_new0(BlockBackend *, s.src_num);
2108     s.src_sectors = g_new(int64_t, s.src_num);
2109 
2110     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2111         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2112                                fmt, src_flags, src_writethrough, quiet,
2113                                force_share);
2114         if (!s.src[bs_i]) {
2115             ret = -1;
2116             goto out;
2117         }
2118         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2119         if (s.src_sectors[bs_i] < 0) {
2120             error_report("Could not get size of %s: %s",
2121                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2122             ret = -1;
2123             goto out;
2124         }
2125         s.total_sectors += s.src_sectors[bs_i];
2126     }
2127 
2128     if (sn_opts) {
2129         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2130                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2131                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2132                                &local_err);
2133     } else if (snapshot_name != NULL) {
2134         if (s.src_num > 1) {
2135             error_report("No support for concatenating multiple snapshot");
2136             ret = -1;
2137             goto out;
2138         }
2139 
2140         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2141                                              &local_err);
2142     }
2143     if (local_err) {
2144         error_reportf_err(local_err, "Failed to load snapshot: ");
2145         ret = -1;
2146         goto out;
2147     }
2148 
2149     /* Find driver and parse its options */
2150     drv = bdrv_find_format(out_fmt);
2151     if (!drv) {
2152         error_report("Unknown file format '%s'", out_fmt);
2153         ret = -1;
2154         goto out;
2155     }
2156 
2157     proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2158     if (!proto_drv) {
2159         error_report_err(local_err);
2160         ret = -1;
2161         goto out;
2162     }
2163 
2164     if (!skip_create) {
2165         if (!drv->create_opts) {
2166             error_report("Format driver '%s' does not support image creation",
2167                          drv->format_name);
2168             ret = -1;
2169             goto out;
2170         }
2171 
2172         if (!proto_drv->create_opts) {
2173             error_report("Protocol driver '%s' does not support image creation",
2174                          proto_drv->format_name);
2175             ret = -1;
2176             goto out;
2177         }
2178 
2179         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2180         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2181 
2182         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2183         if (options) {
2184             qemu_opts_do_parse(opts, options, NULL, &local_err);
2185             if (local_err) {
2186                 error_report_err(local_err);
2187                 ret = -1;
2188                 goto out;
2189             }
2190         }
2191 
2192         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s.total_sectors * 512,
2193                             &error_abort);
2194         ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2195         if (ret < 0) {
2196             goto out;
2197         }
2198     }
2199 
2200     /* Get backing file name if -o backing_file was used */
2201     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2202     if (out_baseimg_param) {
2203         out_baseimg = out_baseimg_param;
2204     }
2205     s.target_has_backing = (bool) out_baseimg;
2206 
2207     if (s.src_num > 1 && out_baseimg) {
2208         error_report("Having a backing file for the target makes no sense when "
2209                      "concatenating multiple input images");
2210         ret = -1;
2211         goto out;
2212     }
2213 
2214     /* Check if compression is supported */
2215     if (s.compressed) {
2216         bool encryption =
2217             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2218         const char *preallocation =
2219             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2220 
2221         if (!drv->bdrv_co_pwritev_compressed) {
2222             error_report("Compression not supported for this file format");
2223             ret = -1;
2224             goto out;
2225         }
2226 
2227         if (encryption) {
2228             error_report("Compression and encryption not supported at "
2229                          "the same time");
2230             ret = -1;
2231             goto out;
2232         }
2233 
2234         if (preallocation
2235             && strcmp(preallocation, "off"))
2236         {
2237             error_report("Compression and preallocation not supported at "
2238                          "the same time");
2239             ret = -1;
2240             goto out;
2241         }
2242     }
2243 
2244     if (!skip_create) {
2245         /* Create the new image */
2246         ret = bdrv_create(drv, out_filename, opts, &local_err);
2247         if (ret < 0) {
2248             error_reportf_err(local_err, "%s: error while converting %s: ",
2249                               out_filename, out_fmt);
2250             goto out;
2251         }
2252     }
2253 
2254     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2255     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2256     if (ret < 0) {
2257         error_report("Invalid cache option: %s", cache);
2258         goto out;
2259     }
2260 
2261     /* XXX we should allow --image-opts to trigger use of
2262      * img_open() here, but then we have trouble with
2263      * the bdrv_create() call which takes different params.
2264      * Not critical right now, so fix can wait...
2265      */
2266     s.target = img_open_file(out_filename, out_fmt, flags, writethrough, quiet,
2267                              false);
2268     if (!s.target) {
2269         ret = -1;
2270         goto out;
2271     }
2272     out_bs = blk_bs(s.target);
2273 
2274     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2275      * or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
2276      * as maximum. */
2277     s.buf_sectors = MIN(32768,
2278                         MAX(s.buf_sectors,
2279                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2280                                 out_bs->bl.pdiscard_alignment >>
2281                                 BDRV_SECTOR_BITS)));
2282 
2283     if (skip_create) {
2284         int64_t output_sectors = blk_nb_sectors(s.target);
2285         if (output_sectors < 0) {
2286             error_report("unable to get output image length: %s",
2287                          strerror(-output_sectors));
2288             ret = -1;
2289             goto out;
2290         } else if (output_sectors < s.total_sectors) {
2291             error_report("output file is smaller than input file");
2292             ret = -1;
2293             goto out;
2294         }
2295     }
2296 
2297     ret = bdrv_get_info(out_bs, &bdi);
2298     if (ret < 0) {
2299         if (s.compressed) {
2300             error_report("could not get block driver info");
2301             goto out;
2302         }
2303     } else {
2304         s.compressed = s.compressed || bdi.needs_compressed_writes;
2305         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2306     }
2307 
2308     ret = convert_do_copy(&s);
2309 out:
2310     if (!ret) {
2311         qemu_progress_print(100, 0);
2312     }
2313     qemu_progress_end();
2314     qemu_opts_del(opts);
2315     qemu_opts_free(create_opts);
2316     qemu_opts_del(sn_opts);
2317     blk_unref(s.target);
2318     if (s.src) {
2319         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2320             blk_unref(s.src[bs_i]);
2321         }
2322         g_free(s.src);
2323     }
2324     g_free(s.src_sectors);
2325 fail_getopt:
2326     g_free(options);
2327 
2328     return !!ret;
2329 }
2330 
2331 
2332 static void dump_snapshots(BlockDriverState *bs)
2333 {
2334     QEMUSnapshotInfo *sn_tab, *sn;
2335     int nb_sns, i;
2336 
2337     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2338     if (nb_sns <= 0)
2339         return;
2340     printf("Snapshot list:\n");
2341     bdrv_snapshot_dump(fprintf, stdout, NULL);
2342     printf("\n");
2343     for(i = 0; i < nb_sns; i++) {
2344         sn = &sn_tab[i];
2345         bdrv_snapshot_dump(fprintf, stdout, sn);
2346         printf("\n");
2347     }
2348     g_free(sn_tab);
2349 }
2350 
2351 static void dump_json_image_info_list(ImageInfoList *list)
2352 {
2353     QString *str;
2354     QObject *obj;
2355     Visitor *v = qobject_output_visitor_new(&obj);
2356 
2357     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2358     visit_complete(v, &obj);
2359     str = qobject_to_json_pretty(obj);
2360     assert(str != NULL);
2361     printf("%s\n", qstring_get_str(str));
2362     qobject_decref(obj);
2363     visit_free(v);
2364     QDECREF(str);
2365 }
2366 
2367 static void dump_json_image_info(ImageInfo *info)
2368 {
2369     QString *str;
2370     QObject *obj;
2371     Visitor *v = qobject_output_visitor_new(&obj);
2372 
2373     visit_type_ImageInfo(v, NULL, &info, &error_abort);
2374     visit_complete(v, &obj);
2375     str = qobject_to_json_pretty(obj);
2376     assert(str != NULL);
2377     printf("%s\n", qstring_get_str(str));
2378     qobject_decref(obj);
2379     visit_free(v);
2380     QDECREF(str);
2381 }
2382 
2383 static void dump_human_image_info_list(ImageInfoList *list)
2384 {
2385     ImageInfoList *elem;
2386     bool delim = false;
2387 
2388     for (elem = list; elem; elem = elem->next) {
2389         if (delim) {
2390             printf("\n");
2391         }
2392         delim = true;
2393 
2394         bdrv_image_info_dump(fprintf, stdout, elem->value);
2395     }
2396 }
2397 
2398 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2399 {
2400     return strcmp(a, b) == 0;
2401 }
2402 
2403 /**
2404  * Open an image file chain and return an ImageInfoList
2405  *
2406  * @filename: topmost image filename
2407  * @fmt: topmost image format (may be NULL to autodetect)
2408  * @chain: true  - enumerate entire backing file chain
2409  *         false - only topmost image file
2410  *
2411  * Returns a list of ImageInfo objects or NULL if there was an error opening an
2412  * image file.  If there was an error a message will have been printed to
2413  * stderr.
2414  */
2415 static ImageInfoList *collect_image_info_list(bool image_opts,
2416                                               const char *filename,
2417                                               const char *fmt,
2418                                               bool chain, bool force_share)
2419 {
2420     ImageInfoList *head = NULL;
2421     ImageInfoList **last = &head;
2422     GHashTable *filenames;
2423     Error *err = NULL;
2424 
2425     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2426 
2427     while (filename) {
2428         BlockBackend *blk;
2429         BlockDriverState *bs;
2430         ImageInfo *info;
2431         ImageInfoList *elem;
2432 
2433         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2434             error_report("Backing file '%s' creates an infinite loop.",
2435                          filename);
2436             goto err;
2437         }
2438         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2439 
2440         blk = img_open(image_opts, filename, fmt,
2441                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2442                        force_share);
2443         if (!blk) {
2444             goto err;
2445         }
2446         bs = blk_bs(blk);
2447 
2448         bdrv_query_image_info(bs, &info, &err);
2449         if (err) {
2450             error_report_err(err);
2451             blk_unref(blk);
2452             goto err;
2453         }
2454 
2455         elem = g_new0(ImageInfoList, 1);
2456         elem->value = info;
2457         *last = elem;
2458         last = &elem->next;
2459 
2460         blk_unref(blk);
2461 
2462         filename = fmt = NULL;
2463         if (chain) {
2464             if (info->has_full_backing_filename) {
2465                 filename = info->full_backing_filename;
2466             } else if (info->has_backing_filename) {
2467                 error_report("Could not determine absolute backing filename,"
2468                              " but backing filename '%s' present",
2469                              info->backing_filename);
2470                 goto err;
2471             }
2472             if (info->has_backing_filename_format) {
2473                 fmt = info->backing_filename_format;
2474             }
2475         }
2476     }
2477     g_hash_table_destroy(filenames);
2478     return head;
2479 
2480 err:
2481     qapi_free_ImageInfoList(head);
2482     g_hash_table_destroy(filenames);
2483     return NULL;
2484 }
2485 
2486 static int img_info(int argc, char **argv)
2487 {
2488     int c;
2489     OutputFormat output_format = OFORMAT_HUMAN;
2490     bool chain = false;
2491     const char *filename, *fmt, *output;
2492     ImageInfoList *list;
2493     bool image_opts = false;
2494     bool force_share = false;
2495 
2496     fmt = NULL;
2497     output = NULL;
2498     for(;;) {
2499         int option_index = 0;
2500         static const struct option long_options[] = {
2501             {"help", no_argument, 0, 'h'},
2502             {"format", required_argument, 0, 'f'},
2503             {"output", required_argument, 0, OPTION_OUTPUT},
2504             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2505             {"object", required_argument, 0, OPTION_OBJECT},
2506             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2507             {"force-share", no_argument, 0, 'U'},
2508             {0, 0, 0, 0}
2509         };
2510         c = getopt_long(argc, argv, ":f:hU",
2511                         long_options, &option_index);
2512         if (c == -1) {
2513             break;
2514         }
2515         switch(c) {
2516         case ':':
2517             missing_argument(argv[optind - 1]);
2518             break;
2519         case '?':
2520             unrecognized_option(argv[optind - 1]);
2521             break;
2522         case 'h':
2523             help();
2524             break;
2525         case 'f':
2526             fmt = optarg;
2527             break;
2528         case 'U':
2529             force_share = true;
2530             break;
2531         case OPTION_OUTPUT:
2532             output = optarg;
2533             break;
2534         case OPTION_BACKING_CHAIN:
2535             chain = true;
2536             break;
2537         case OPTION_OBJECT: {
2538             QemuOpts *opts;
2539             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2540                                            optarg, true);
2541             if (!opts) {
2542                 return 1;
2543             }
2544         }   break;
2545         case OPTION_IMAGE_OPTS:
2546             image_opts = true;
2547             break;
2548         }
2549     }
2550     if (optind != argc - 1) {
2551         error_exit("Expecting one image file name");
2552     }
2553     filename = argv[optind++];
2554 
2555     if (output && !strcmp(output, "json")) {
2556         output_format = OFORMAT_JSON;
2557     } else if (output && !strcmp(output, "human")) {
2558         output_format = OFORMAT_HUMAN;
2559     } else if (output) {
2560         error_report("--output must be used with human or json as argument.");
2561         return 1;
2562     }
2563 
2564     if (qemu_opts_foreach(&qemu_object_opts,
2565                           user_creatable_add_opts_foreach,
2566                           NULL, NULL)) {
2567         return 1;
2568     }
2569 
2570     list = collect_image_info_list(image_opts, filename, fmt, chain,
2571                                    force_share);
2572     if (!list) {
2573         return 1;
2574     }
2575 
2576     switch (output_format) {
2577     case OFORMAT_HUMAN:
2578         dump_human_image_info_list(list);
2579         break;
2580     case OFORMAT_JSON:
2581         if (chain) {
2582             dump_json_image_info_list(list);
2583         } else {
2584             dump_json_image_info(list->value);
2585         }
2586         break;
2587     }
2588 
2589     qapi_free_ImageInfoList(list);
2590     return 0;
2591 }
2592 
2593 static void dump_map_entry(OutputFormat output_format, MapEntry *e,
2594                            MapEntry *next)
2595 {
2596     switch (output_format) {
2597     case OFORMAT_HUMAN:
2598         if (e->data && !e->has_offset) {
2599             error_report("File contains external, encrypted or compressed clusters.");
2600             exit(1);
2601         }
2602         if (e->data && !e->zero) {
2603             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2604                    e->start, e->length,
2605                    e->has_offset ? e->offset : 0,
2606                    e->has_filename ? e->filename : "");
2607         }
2608         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2609          * Modify the flags here to allow more coalescing.
2610          */
2611         if (next && (!next->data || next->zero)) {
2612             next->data = false;
2613             next->zero = true;
2614         }
2615         break;
2616     case OFORMAT_JSON:
2617         printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2618                " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2619                (e->start == 0 ? "[" : ",\n"),
2620                e->start, e->length, e->depth,
2621                e->zero ? "true" : "false",
2622                e->data ? "true" : "false");
2623         if (e->has_offset) {
2624             printf(", \"offset\": %"PRId64"", e->offset);
2625         }
2626         putchar('}');
2627 
2628         if (!next) {
2629             printf("]\n");
2630         }
2631         break;
2632     }
2633 }
2634 
2635 static int get_block_status(BlockDriverState *bs, int64_t sector_num,
2636                             int nb_sectors, MapEntry *e)
2637 {
2638     int64_t ret;
2639     int depth;
2640     BlockDriverState *file;
2641     bool has_offset;
2642 
2643     /* As an optimization, we could cache the current range of unallocated
2644      * clusters in each file of the chain, and avoid querying the same
2645      * range repeatedly.
2646      */
2647 
2648     depth = 0;
2649     for (;;) {
2650         ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &nb_sectors,
2651                                     &file);
2652         if (ret < 0) {
2653             return ret;
2654         }
2655         assert(nb_sectors);
2656         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2657             break;
2658         }
2659         bs = backing_bs(bs);
2660         if (bs == NULL) {
2661             ret = 0;
2662             break;
2663         }
2664 
2665         depth++;
2666     }
2667 
2668     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2669 
2670     *e = (MapEntry) {
2671         .start = sector_num * BDRV_SECTOR_SIZE,
2672         .length = nb_sectors * BDRV_SECTOR_SIZE,
2673         .data = !!(ret & BDRV_BLOCK_DATA),
2674         .zero = !!(ret & BDRV_BLOCK_ZERO),
2675         .offset = ret & BDRV_BLOCK_OFFSET_MASK,
2676         .has_offset = has_offset,
2677         .depth = depth,
2678         .has_filename = file && has_offset,
2679         .filename = file && has_offset ? file->filename : NULL,
2680     };
2681 
2682     return 0;
2683 }
2684 
2685 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2686 {
2687     if (curr->length == 0) {
2688         return false;
2689     }
2690     if (curr->zero != next->zero ||
2691         curr->data != next->data ||
2692         curr->depth != next->depth ||
2693         curr->has_filename != next->has_filename ||
2694         curr->has_offset != next->has_offset) {
2695         return false;
2696     }
2697     if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2698         return false;
2699     }
2700     if (curr->has_offset && curr->offset + curr->length != next->offset) {
2701         return false;
2702     }
2703     return true;
2704 }
2705 
2706 static int img_map(int argc, char **argv)
2707 {
2708     int c;
2709     OutputFormat output_format = OFORMAT_HUMAN;
2710     BlockBackend *blk;
2711     BlockDriverState *bs;
2712     const char *filename, *fmt, *output;
2713     int64_t length;
2714     MapEntry curr = { .length = 0 }, next;
2715     int ret = 0;
2716     bool image_opts = false;
2717     bool force_share = false;
2718 
2719     fmt = NULL;
2720     output = NULL;
2721     for (;;) {
2722         int option_index = 0;
2723         static const struct option long_options[] = {
2724             {"help", no_argument, 0, 'h'},
2725             {"format", required_argument, 0, 'f'},
2726             {"output", required_argument, 0, OPTION_OUTPUT},
2727             {"object", required_argument, 0, OPTION_OBJECT},
2728             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2729             {"force-share", no_argument, 0, 'U'},
2730             {0, 0, 0, 0}
2731         };
2732         c = getopt_long(argc, argv, ":f:hU",
2733                         long_options, &option_index);
2734         if (c == -1) {
2735             break;
2736         }
2737         switch (c) {
2738         case ':':
2739             missing_argument(argv[optind - 1]);
2740             break;
2741         case '?':
2742             unrecognized_option(argv[optind - 1]);
2743             break;
2744         case 'h':
2745             help();
2746             break;
2747         case 'f':
2748             fmt = optarg;
2749             break;
2750         case 'U':
2751             force_share = true;
2752             break;
2753         case OPTION_OUTPUT:
2754             output = optarg;
2755             break;
2756         case OPTION_OBJECT: {
2757             QemuOpts *opts;
2758             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2759                                            optarg, true);
2760             if (!opts) {
2761                 return 1;
2762             }
2763         }   break;
2764         case OPTION_IMAGE_OPTS:
2765             image_opts = true;
2766             break;
2767         }
2768     }
2769     if (optind != argc - 1) {
2770         error_exit("Expecting one image file name");
2771     }
2772     filename = argv[optind];
2773 
2774     if (output && !strcmp(output, "json")) {
2775         output_format = OFORMAT_JSON;
2776     } else if (output && !strcmp(output, "human")) {
2777         output_format = OFORMAT_HUMAN;
2778     } else if (output) {
2779         error_report("--output must be used with human or json as argument.");
2780         return 1;
2781     }
2782 
2783     if (qemu_opts_foreach(&qemu_object_opts,
2784                           user_creatable_add_opts_foreach,
2785                           NULL, NULL)) {
2786         return 1;
2787     }
2788 
2789     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
2790     if (!blk) {
2791         return 1;
2792     }
2793     bs = blk_bs(blk);
2794 
2795     if (output_format == OFORMAT_HUMAN) {
2796         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
2797     }
2798 
2799     length = blk_getlength(blk);
2800     while (curr.start + curr.length < length) {
2801         int64_t nsectors_left;
2802         int64_t sector_num;
2803         int n;
2804 
2805         sector_num = (curr.start + curr.length) >> BDRV_SECTOR_BITS;
2806 
2807         /* Probe up to 1 GiB at a time.  */
2808         nsectors_left = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE) - sector_num;
2809         n = MIN(1 << (30 - BDRV_SECTOR_BITS), nsectors_left);
2810         ret = get_block_status(bs, sector_num, n, &next);
2811 
2812         if (ret < 0) {
2813             error_report("Could not read file metadata: %s", strerror(-ret));
2814             goto out;
2815         }
2816 
2817         if (entry_mergeable(&curr, &next)) {
2818             curr.length += next.length;
2819             continue;
2820         }
2821 
2822         if (curr.length > 0) {
2823             dump_map_entry(output_format, &curr, &next);
2824         }
2825         curr = next;
2826     }
2827 
2828     dump_map_entry(output_format, &curr, NULL);
2829 
2830 out:
2831     blk_unref(blk);
2832     return ret < 0;
2833 }
2834 
2835 #define SNAPSHOT_LIST   1
2836 #define SNAPSHOT_CREATE 2
2837 #define SNAPSHOT_APPLY  3
2838 #define SNAPSHOT_DELETE 4
2839 
2840 static int img_snapshot(int argc, char **argv)
2841 {
2842     BlockBackend *blk;
2843     BlockDriverState *bs;
2844     QEMUSnapshotInfo sn;
2845     char *filename, *snapshot_name = NULL;
2846     int c, ret = 0, bdrv_oflags;
2847     int action = 0;
2848     qemu_timeval tv;
2849     bool quiet = false;
2850     Error *err = NULL;
2851     bool image_opts = false;
2852     bool force_share = false;
2853 
2854     bdrv_oflags = BDRV_O_RDWR;
2855     /* Parse commandline parameters */
2856     for(;;) {
2857         static const struct option long_options[] = {
2858             {"help", no_argument, 0, 'h'},
2859             {"object", required_argument, 0, OPTION_OBJECT},
2860             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2861             {"force-share", no_argument, 0, 'U'},
2862             {0, 0, 0, 0}
2863         };
2864         c = getopt_long(argc, argv, ":la:c:d:hqU",
2865                         long_options, NULL);
2866         if (c == -1) {
2867             break;
2868         }
2869         switch(c) {
2870         case ':':
2871             missing_argument(argv[optind - 1]);
2872             break;
2873         case '?':
2874             unrecognized_option(argv[optind - 1]);
2875             break;
2876         case 'h':
2877             help();
2878             return 0;
2879         case 'l':
2880             if (action) {
2881                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2882                 return 0;
2883             }
2884             action = SNAPSHOT_LIST;
2885             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
2886             break;
2887         case 'a':
2888             if (action) {
2889                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2890                 return 0;
2891             }
2892             action = SNAPSHOT_APPLY;
2893             snapshot_name = optarg;
2894             break;
2895         case 'c':
2896             if (action) {
2897                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2898                 return 0;
2899             }
2900             action = SNAPSHOT_CREATE;
2901             snapshot_name = optarg;
2902             break;
2903         case 'd':
2904             if (action) {
2905                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2906                 return 0;
2907             }
2908             action = SNAPSHOT_DELETE;
2909             snapshot_name = optarg;
2910             break;
2911         case 'q':
2912             quiet = true;
2913             break;
2914         case 'U':
2915             force_share = true;
2916             break;
2917         case OPTION_OBJECT: {
2918             QemuOpts *opts;
2919             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2920                                            optarg, true);
2921             if (!opts) {
2922                 return 1;
2923             }
2924         }   break;
2925         case OPTION_IMAGE_OPTS:
2926             image_opts = true;
2927             break;
2928         }
2929     }
2930 
2931     if (optind != argc - 1) {
2932         error_exit("Expecting one image file name");
2933     }
2934     filename = argv[optind++];
2935 
2936     if (qemu_opts_foreach(&qemu_object_opts,
2937                           user_creatable_add_opts_foreach,
2938                           NULL, NULL)) {
2939         return 1;
2940     }
2941 
2942     /* Open the image */
2943     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
2944                    force_share);
2945     if (!blk) {
2946         return 1;
2947     }
2948     bs = blk_bs(blk);
2949 
2950     /* Perform the requested action */
2951     switch(action) {
2952     case SNAPSHOT_LIST:
2953         dump_snapshots(bs);
2954         break;
2955 
2956     case SNAPSHOT_CREATE:
2957         memset(&sn, 0, sizeof(sn));
2958         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
2959 
2960         qemu_gettimeofday(&tv);
2961         sn.date_sec = tv.tv_sec;
2962         sn.date_nsec = tv.tv_usec * 1000;
2963 
2964         ret = bdrv_snapshot_create(bs, &sn);
2965         if (ret) {
2966             error_report("Could not create snapshot '%s': %d (%s)",
2967                 snapshot_name, ret, strerror(-ret));
2968         }
2969         break;
2970 
2971     case SNAPSHOT_APPLY:
2972         ret = bdrv_snapshot_goto(bs, snapshot_name);
2973         if (ret) {
2974             error_report("Could not apply snapshot '%s': %d (%s)",
2975                 snapshot_name, ret, strerror(-ret));
2976         }
2977         break;
2978 
2979     case SNAPSHOT_DELETE:
2980         bdrv_snapshot_delete_by_id_or_name(bs, snapshot_name, &err);
2981         if (err) {
2982             error_reportf_err(err, "Could not delete snapshot '%s': ",
2983                               snapshot_name);
2984             ret = 1;
2985         }
2986         break;
2987     }
2988 
2989     /* Cleanup */
2990     blk_unref(blk);
2991     if (ret) {
2992         return 1;
2993     }
2994     return 0;
2995 }
2996 
2997 static int img_rebase(int argc, char **argv)
2998 {
2999     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3000     uint8_t *buf_old = NULL;
3001     uint8_t *buf_new = NULL;
3002     BlockDriverState *bs = NULL;
3003     char *filename;
3004     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3005     int c, flags, src_flags, ret;
3006     bool writethrough, src_writethrough;
3007     int unsafe = 0;
3008     bool force_share = false;
3009     int progress = 0;
3010     bool quiet = false;
3011     Error *local_err = NULL;
3012     bool image_opts = false;
3013 
3014     /* Parse commandline parameters */
3015     fmt = NULL;
3016     cache = BDRV_DEFAULT_CACHE;
3017     src_cache = BDRV_DEFAULT_CACHE;
3018     out_baseimg = NULL;
3019     out_basefmt = NULL;
3020     for(;;) {
3021         static const struct option long_options[] = {
3022             {"help", no_argument, 0, 'h'},
3023             {"object", required_argument, 0, OPTION_OBJECT},
3024             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3025             {"force-share", no_argument, 0, 'U'},
3026             {0, 0, 0, 0}
3027         };
3028         c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
3029                         long_options, NULL);
3030         if (c == -1) {
3031             break;
3032         }
3033         switch(c) {
3034         case ':':
3035             missing_argument(argv[optind - 1]);
3036             break;
3037         case '?':
3038             unrecognized_option(argv[optind - 1]);
3039             break;
3040         case 'h':
3041             help();
3042             return 0;
3043         case 'f':
3044             fmt = optarg;
3045             break;
3046         case 'F':
3047             out_basefmt = optarg;
3048             break;
3049         case 'b':
3050             out_baseimg = optarg;
3051             break;
3052         case 'u':
3053             unsafe = 1;
3054             break;
3055         case 'p':
3056             progress = 1;
3057             break;
3058         case 't':
3059             cache = optarg;
3060             break;
3061         case 'T':
3062             src_cache = optarg;
3063             break;
3064         case 'q':
3065             quiet = true;
3066             break;
3067         case OPTION_OBJECT: {
3068             QemuOpts *opts;
3069             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3070                                            optarg, true);
3071             if (!opts) {
3072                 return 1;
3073             }
3074         }   break;
3075         case OPTION_IMAGE_OPTS:
3076             image_opts = true;
3077             break;
3078         case 'U':
3079             force_share = true;
3080             break;
3081         }
3082     }
3083 
3084     if (quiet) {
3085         progress = 0;
3086     }
3087 
3088     if (optind != argc - 1) {
3089         error_exit("Expecting one image file name");
3090     }
3091     if (!unsafe && !out_baseimg) {
3092         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3093     }
3094     filename = argv[optind++];
3095 
3096     if (qemu_opts_foreach(&qemu_object_opts,
3097                           user_creatable_add_opts_foreach,
3098                           NULL, NULL)) {
3099         return 1;
3100     }
3101 
3102     qemu_progress_init(progress, 2.0);
3103     qemu_progress_print(0, 100);
3104 
3105     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3106     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3107     if (ret < 0) {
3108         error_report("Invalid cache option: %s", cache);
3109         goto out;
3110     }
3111 
3112     src_flags = 0;
3113     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3114     if (ret < 0) {
3115         error_report("Invalid source cache option: %s", src_cache);
3116         goto out;
3117     }
3118 
3119     /* The source files are opened read-only, don't care about WCE */
3120     assert((src_flags & BDRV_O_RDWR) == 0);
3121     (void) src_writethrough;
3122 
3123     /*
3124      * Open the images.
3125      *
3126      * Ignore the old backing file for unsafe rebase in case we want to correct
3127      * the reference to a renamed or moved backing file.
3128      */
3129     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3130                    false);
3131     if (!blk) {
3132         ret = -1;
3133         goto out;
3134     }
3135     bs = blk_bs(blk);
3136 
3137     if (out_basefmt != NULL) {
3138         if (bdrv_find_format(out_basefmt) == NULL) {
3139             error_report("Invalid format name: '%s'", out_basefmt);
3140             ret = -1;
3141             goto out;
3142         }
3143     }
3144 
3145     /* For safe rebasing we need to compare old and new backing file */
3146     if (!unsafe) {
3147         char backing_name[PATH_MAX];
3148         QDict *options = NULL;
3149 
3150         if (bs->backing_format[0] != '\0') {
3151             options = qdict_new();
3152             qdict_put_str(options, "driver", bs->backing_format);
3153         }
3154 
3155         if (force_share) {
3156             if (!options) {
3157                 options = qdict_new();
3158             }
3159             qdict_put(options, BDRV_OPT_FORCE_SHARE,
3160                       qbool_from_bool(true));
3161         }
3162         bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
3163         blk_old_backing = blk_new_open(backing_name, NULL,
3164                                        options, src_flags, &local_err);
3165         if (!blk_old_backing) {
3166             error_reportf_err(local_err,
3167                               "Could not open old backing file '%s': ",
3168                               backing_name);
3169             ret = -1;
3170             goto out;
3171         }
3172 
3173         if (out_baseimg[0]) {
3174             options = qdict_new();
3175             if (out_basefmt) {
3176                 qdict_put_str(options, "driver", out_basefmt);
3177             }
3178             if (force_share) {
3179                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3180             }
3181 
3182             blk_new_backing = blk_new_open(out_baseimg, NULL,
3183                                            options, src_flags, &local_err);
3184             if (!blk_new_backing) {
3185                 error_reportf_err(local_err,
3186                                   "Could not open new backing file '%s': ",
3187                                   out_baseimg);
3188                 ret = -1;
3189                 goto out;
3190             }
3191         }
3192     }
3193 
3194     /*
3195      * Check each unallocated cluster in the COW file. If it is unallocated,
3196      * accesses go to the backing file. We must therefore compare this cluster
3197      * in the old and new backing file, and if they differ we need to copy it
3198      * from the old backing file into the COW file.
3199      *
3200      * If qemu-img crashes during this step, no harm is done. The content of
3201      * the image is the same as the original one at any time.
3202      */
3203     if (!unsafe) {
3204         int64_t num_sectors;
3205         int64_t old_backing_num_sectors;
3206         int64_t new_backing_num_sectors = 0;
3207         uint64_t sector;
3208         int n;
3209         float local_progress = 0;
3210 
3211         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3212         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3213 
3214         num_sectors = blk_nb_sectors(blk);
3215         if (num_sectors < 0) {
3216             error_report("Could not get size of '%s': %s",
3217                          filename, strerror(-num_sectors));
3218             ret = -1;
3219             goto out;
3220         }
3221         old_backing_num_sectors = blk_nb_sectors(blk_old_backing);
3222         if (old_backing_num_sectors < 0) {
3223             char backing_name[PATH_MAX];
3224 
3225             bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
3226             error_report("Could not get size of '%s': %s",
3227                          backing_name, strerror(-old_backing_num_sectors));
3228             ret = -1;
3229             goto out;
3230         }
3231         if (blk_new_backing) {
3232             new_backing_num_sectors = blk_nb_sectors(blk_new_backing);
3233             if (new_backing_num_sectors < 0) {
3234                 error_report("Could not get size of '%s': %s",
3235                              out_baseimg, strerror(-new_backing_num_sectors));
3236                 ret = -1;
3237                 goto out;
3238             }
3239         }
3240 
3241         if (num_sectors != 0) {
3242             local_progress = (float)100 /
3243                 (num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512));
3244         }
3245 
3246         for (sector = 0; sector < num_sectors; sector += n) {
3247 
3248             /* How many sectors can we handle with the next read? */
3249             if (sector + (IO_BUF_SIZE / 512) <= num_sectors) {
3250                 n = (IO_BUF_SIZE / 512);
3251             } else {
3252                 n = num_sectors - sector;
3253             }
3254 
3255             /* If the cluster is allocated, we don't need to take action */
3256             ret = bdrv_is_allocated(bs, sector, n, &n);
3257             if (ret < 0) {
3258                 error_report("error while reading image metadata: %s",
3259                              strerror(-ret));
3260                 goto out;
3261             }
3262             if (ret) {
3263                 continue;
3264             }
3265 
3266             /*
3267              * Read old and new backing file and take into consideration that
3268              * backing files may be smaller than the COW image.
3269              */
3270             if (sector >= old_backing_num_sectors) {
3271                 memset(buf_old, 0, n * BDRV_SECTOR_SIZE);
3272             } else {
3273                 if (sector + n > old_backing_num_sectors) {
3274                     n = old_backing_num_sectors - sector;
3275                 }
3276 
3277                 ret = blk_pread(blk_old_backing, sector << BDRV_SECTOR_BITS,
3278                                 buf_old, n << BDRV_SECTOR_BITS);
3279                 if (ret < 0) {
3280                     error_report("error while reading from old backing file");
3281                     goto out;
3282                 }
3283             }
3284 
3285             if (sector >= new_backing_num_sectors || !blk_new_backing) {
3286                 memset(buf_new, 0, n * BDRV_SECTOR_SIZE);
3287             } else {
3288                 if (sector + n > new_backing_num_sectors) {
3289                     n = new_backing_num_sectors - sector;
3290                 }
3291 
3292                 ret = blk_pread(blk_new_backing, sector << BDRV_SECTOR_BITS,
3293                                 buf_new, n << BDRV_SECTOR_BITS);
3294                 if (ret < 0) {
3295                     error_report("error while reading from new backing file");
3296                     goto out;
3297                 }
3298             }
3299 
3300             /* If they differ, we need to write to the COW file */
3301             uint64_t written = 0;
3302 
3303             while (written < n) {
3304                 int pnum;
3305 
3306                 if (compare_sectors(buf_old + written * 512,
3307                     buf_new + written * 512, n - written, &pnum))
3308                 {
3309                     ret = blk_pwrite(blk,
3310                                      (sector + written) << BDRV_SECTOR_BITS,
3311                                      buf_old + written * 512,
3312                                      pnum << BDRV_SECTOR_BITS, 0);
3313                     if (ret < 0) {
3314                         error_report("Error while writing to COW image: %s",
3315                             strerror(-ret));
3316                         goto out;
3317                     }
3318                 }
3319 
3320                 written += pnum;
3321             }
3322             qemu_progress_print(local_progress, 100);
3323         }
3324     }
3325 
3326     /*
3327      * Change the backing file. All clusters that are different from the old
3328      * backing file are overwritten in the COW file now, so the visible content
3329      * doesn't change when we switch the backing file.
3330      */
3331     if (out_baseimg && *out_baseimg) {
3332         ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3333     } else {
3334         ret = bdrv_change_backing_file(bs, NULL, NULL);
3335     }
3336 
3337     if (ret == -ENOSPC) {
3338         error_report("Could not change the backing file to '%s': No "
3339                      "space left in the file header", out_baseimg);
3340     } else if (ret < 0) {
3341         error_report("Could not change the backing file to '%s': %s",
3342             out_baseimg, strerror(-ret));
3343     }
3344 
3345     qemu_progress_print(100, 0);
3346     /*
3347      * TODO At this point it is possible to check if any clusters that are
3348      * allocated in the COW file are the same in the backing file. If so, they
3349      * could be dropped from the COW file. Don't do this before switching the
3350      * backing file, in case of a crash this would lead to corruption.
3351      */
3352 out:
3353     qemu_progress_end();
3354     /* Cleanup */
3355     if (!unsafe) {
3356         blk_unref(blk_old_backing);
3357         blk_unref(blk_new_backing);
3358     }
3359     qemu_vfree(buf_old);
3360     qemu_vfree(buf_new);
3361 
3362     blk_unref(blk);
3363     if (ret) {
3364         return 1;
3365     }
3366     return 0;
3367 }
3368 
3369 static int img_resize(int argc, char **argv)
3370 {
3371     Error *err = NULL;
3372     int c, ret, relative;
3373     const char *filename, *fmt, *size;
3374     int64_t n, total_size;
3375     bool quiet = false;
3376     BlockBackend *blk = NULL;
3377     QemuOpts *param;
3378 
3379     static QemuOptsList resize_options = {
3380         .name = "resize_options",
3381         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3382         .desc = {
3383             {
3384                 .name = BLOCK_OPT_SIZE,
3385                 .type = QEMU_OPT_SIZE,
3386                 .help = "Virtual disk size"
3387             }, {
3388                 /* end of list */
3389             }
3390         },
3391     };
3392     bool image_opts = false;
3393 
3394     /* Remove size from argv manually so that negative numbers are not treated
3395      * as options by getopt. */
3396     if (argc < 3) {
3397         error_exit("Not enough arguments");
3398         return 1;
3399     }
3400 
3401     size = argv[--argc];
3402 
3403     /* Parse getopt arguments */
3404     fmt = NULL;
3405     for(;;) {
3406         static const struct option long_options[] = {
3407             {"help", no_argument, 0, 'h'},
3408             {"object", required_argument, 0, OPTION_OBJECT},
3409             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3410             {0, 0, 0, 0}
3411         };
3412         c = getopt_long(argc, argv, ":f:hq",
3413                         long_options, NULL);
3414         if (c == -1) {
3415             break;
3416         }
3417         switch(c) {
3418         case ':':
3419             missing_argument(argv[optind - 1]);
3420             break;
3421         case '?':
3422             unrecognized_option(argv[optind - 1]);
3423             break;
3424         case 'h':
3425             help();
3426             break;
3427         case 'f':
3428             fmt = optarg;
3429             break;
3430         case 'q':
3431             quiet = true;
3432             break;
3433         case OPTION_OBJECT: {
3434             QemuOpts *opts;
3435             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3436                                            optarg, true);
3437             if (!opts) {
3438                 return 1;
3439             }
3440         }   break;
3441         case OPTION_IMAGE_OPTS:
3442             image_opts = true;
3443             break;
3444         }
3445     }
3446     if (optind != argc - 1) {
3447         error_exit("Expecting one image file name");
3448     }
3449     filename = argv[optind++];
3450 
3451     if (qemu_opts_foreach(&qemu_object_opts,
3452                           user_creatable_add_opts_foreach,
3453                           NULL, NULL)) {
3454         return 1;
3455     }
3456 
3457     /* Choose grow, shrink, or absolute resize mode */
3458     switch (size[0]) {
3459     case '+':
3460         relative = 1;
3461         size++;
3462         break;
3463     case '-':
3464         relative = -1;
3465         size++;
3466         break;
3467     default:
3468         relative = 0;
3469         break;
3470     }
3471 
3472     /* Parse size */
3473     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3474     qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3475     if (err) {
3476         error_report_err(err);
3477         ret = -1;
3478         qemu_opts_del(param);
3479         goto out;
3480     }
3481     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3482     qemu_opts_del(param);
3483 
3484     blk = img_open(image_opts, filename, fmt,
3485                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
3486                    false);
3487     if (!blk) {
3488         ret = -1;
3489         goto out;
3490     }
3491 
3492     if (relative) {
3493         total_size = blk_getlength(blk) + n * relative;
3494     } else {
3495         total_size = n;
3496     }
3497     if (total_size <= 0) {
3498         error_report("New image size must be positive");
3499         ret = -1;
3500         goto out;
3501     }
3502 
3503     ret = blk_truncate(blk, total_size, &err);
3504     if (!ret) {
3505         qprintf(quiet, "Image resized.\n");
3506     } else {
3507         error_report_err(err);
3508     }
3509 out:
3510     blk_unref(blk);
3511     if (ret) {
3512         return 1;
3513     }
3514     return 0;
3515 }
3516 
3517 static void amend_status_cb(BlockDriverState *bs,
3518                             int64_t offset, int64_t total_work_size,
3519                             void *opaque)
3520 {
3521     qemu_progress_print(100.f * offset / total_work_size, 0);
3522 }
3523 
3524 static int img_amend(int argc, char **argv)
3525 {
3526     Error *err = NULL;
3527     int c, ret = 0;
3528     char *options = NULL;
3529     QemuOptsList *create_opts = NULL;
3530     QemuOpts *opts = NULL;
3531     const char *fmt = NULL, *filename, *cache;
3532     int flags;
3533     bool writethrough;
3534     bool quiet = false, progress = false;
3535     BlockBackend *blk = NULL;
3536     BlockDriverState *bs = NULL;
3537     bool image_opts = false;
3538 
3539     cache = BDRV_DEFAULT_CACHE;
3540     for (;;) {
3541         static const struct option long_options[] = {
3542             {"help", no_argument, 0, 'h'},
3543             {"object", required_argument, 0, OPTION_OBJECT},
3544             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3545             {0, 0, 0, 0}
3546         };
3547         c = getopt_long(argc, argv, ":ho:f:t:pq",
3548                         long_options, NULL);
3549         if (c == -1) {
3550             break;
3551         }
3552 
3553         switch (c) {
3554         case ':':
3555             missing_argument(argv[optind - 1]);
3556             break;
3557         case '?':
3558             unrecognized_option(argv[optind - 1]);
3559             break;
3560         case 'h':
3561             help();
3562             break;
3563         case 'o':
3564             if (!is_valid_option_list(optarg)) {
3565                 error_report("Invalid option list: %s", optarg);
3566                 ret = -1;
3567                 goto out_no_progress;
3568             }
3569             if (!options) {
3570                 options = g_strdup(optarg);
3571             } else {
3572                 char *old_options = options;
3573                 options = g_strdup_printf("%s,%s", options, optarg);
3574                 g_free(old_options);
3575             }
3576             break;
3577         case 'f':
3578             fmt = optarg;
3579             break;
3580         case 't':
3581             cache = optarg;
3582             break;
3583         case 'p':
3584             progress = true;
3585             break;
3586         case 'q':
3587             quiet = true;
3588             break;
3589         case OPTION_OBJECT:
3590             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3591                                            optarg, true);
3592             if (!opts) {
3593                 ret = -1;
3594                 goto out_no_progress;
3595             }
3596             break;
3597         case OPTION_IMAGE_OPTS:
3598             image_opts = true;
3599             break;
3600         }
3601     }
3602 
3603     if (!options) {
3604         error_exit("Must specify options (-o)");
3605     }
3606 
3607     if (qemu_opts_foreach(&qemu_object_opts,
3608                           user_creatable_add_opts_foreach,
3609                           NULL, NULL)) {
3610         ret = -1;
3611         goto out_no_progress;
3612     }
3613 
3614     if (quiet) {
3615         progress = false;
3616     }
3617     qemu_progress_init(progress, 1.0);
3618 
3619     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
3620     if (fmt && has_help_option(options)) {
3621         /* If a format is explicitly specified (and possibly no filename is
3622          * given), print option help here */
3623         ret = print_block_option_help(filename, fmt);
3624         goto out;
3625     }
3626 
3627     if (optind != argc - 1) {
3628         error_report("Expecting one image file name");
3629         ret = -1;
3630         goto out;
3631     }
3632 
3633     flags = BDRV_O_RDWR;
3634     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3635     if (ret < 0) {
3636         error_report("Invalid cache option: %s", cache);
3637         goto out;
3638     }
3639 
3640     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3641                    false);
3642     if (!blk) {
3643         ret = -1;
3644         goto out;
3645     }
3646     bs = blk_bs(blk);
3647 
3648     fmt = bs->drv->format_name;
3649 
3650     if (has_help_option(options)) {
3651         /* If the format was auto-detected, print option help here */
3652         ret = print_block_option_help(filename, fmt);
3653         goto out;
3654     }
3655 
3656     if (!bs->drv->create_opts) {
3657         error_report("Format driver '%s' does not support any options to amend",
3658                      fmt);
3659         ret = -1;
3660         goto out;
3661     }
3662 
3663     create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
3664     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3665     qemu_opts_do_parse(opts, options, NULL, &err);
3666     if (err) {
3667         error_report_err(err);
3668         ret = -1;
3669         goto out;
3670     }
3671 
3672     /* In case the driver does not call amend_status_cb() */
3673     qemu_progress_print(0.f, 0);
3674     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL);
3675     qemu_progress_print(100.f, 0);
3676     if (ret < 0) {
3677         error_report("Error while amending options: %s", strerror(-ret));
3678         goto out;
3679     }
3680 
3681 out:
3682     qemu_progress_end();
3683 
3684 out_no_progress:
3685     blk_unref(blk);
3686     qemu_opts_del(opts);
3687     qemu_opts_free(create_opts);
3688     g_free(options);
3689 
3690     if (ret) {
3691         return 1;
3692     }
3693     return 0;
3694 }
3695 
3696 typedef struct BenchData {
3697     BlockBackend *blk;
3698     uint64_t image_size;
3699     bool write;
3700     int bufsize;
3701     int step;
3702     int nrreq;
3703     int n;
3704     int flush_interval;
3705     bool drain_on_flush;
3706     uint8_t *buf;
3707     QEMUIOVector *qiov;
3708 
3709     int in_flight;
3710     bool in_flush;
3711     uint64_t offset;
3712 } BenchData;
3713 
3714 static void bench_undrained_flush_cb(void *opaque, int ret)
3715 {
3716     if (ret < 0) {
3717         error_report("Failed flush request: %s", strerror(-ret));
3718         exit(EXIT_FAILURE);
3719     }
3720 }
3721 
3722 static void bench_cb(void *opaque, int ret)
3723 {
3724     BenchData *b = opaque;
3725     BlockAIOCB *acb;
3726 
3727     if (ret < 0) {
3728         error_report("Failed request: %s", strerror(-ret));
3729         exit(EXIT_FAILURE);
3730     }
3731 
3732     if (b->in_flush) {
3733         /* Just finished a flush with drained queue: Start next requests */
3734         assert(b->in_flight == 0);
3735         b->in_flush = false;
3736     } else if (b->in_flight > 0) {
3737         int remaining = b->n - b->in_flight;
3738 
3739         b->n--;
3740         b->in_flight--;
3741 
3742         /* Time for flush? Drain queue if requested, then flush */
3743         if (b->flush_interval && remaining % b->flush_interval == 0) {
3744             if (!b->in_flight || !b->drain_on_flush) {
3745                 BlockCompletionFunc *cb;
3746 
3747                 if (b->drain_on_flush) {
3748                     b->in_flush = true;
3749                     cb = bench_cb;
3750                 } else {
3751                     cb = bench_undrained_flush_cb;
3752                 }
3753 
3754                 acb = blk_aio_flush(b->blk, cb, b);
3755                 if (!acb) {
3756                     error_report("Failed to issue flush request");
3757                     exit(EXIT_FAILURE);
3758                 }
3759             }
3760             if (b->drain_on_flush) {
3761                 return;
3762             }
3763         }
3764     }
3765 
3766     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
3767         int64_t offset = b->offset;
3768         /* blk_aio_* might look for completed I/Os and kick bench_cb
3769          * again, so make sure this operation is counted by in_flight
3770          * and b->offset is ready for the next submission.
3771          */
3772         b->in_flight++;
3773         b->offset += b->step;
3774         b->offset %= b->image_size;
3775         if (b->write) {
3776             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
3777         } else {
3778             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
3779         }
3780         if (!acb) {
3781             error_report("Failed to issue request");
3782             exit(EXIT_FAILURE);
3783         }
3784     }
3785 }
3786 
3787 static int img_bench(int argc, char **argv)
3788 {
3789     int c, ret = 0;
3790     const char *fmt = NULL, *filename;
3791     bool quiet = false;
3792     bool image_opts = false;
3793     bool is_write = false;
3794     int count = 75000;
3795     int depth = 64;
3796     int64_t offset = 0;
3797     size_t bufsize = 4096;
3798     int pattern = 0;
3799     size_t step = 0;
3800     int flush_interval = 0;
3801     bool drain_on_flush = true;
3802     int64_t image_size;
3803     BlockBackend *blk = NULL;
3804     BenchData data = {};
3805     int flags = 0;
3806     bool writethrough = false;
3807     struct timeval t1, t2;
3808     int i;
3809     bool force_share = false;
3810 
3811     for (;;) {
3812         static const struct option long_options[] = {
3813             {"help", no_argument, 0, 'h'},
3814             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
3815             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3816             {"pattern", required_argument, 0, OPTION_PATTERN},
3817             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
3818             {"force-share", no_argument, 0, 'U'},
3819             {0, 0, 0, 0}
3820         };
3821         c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:wU", long_options, NULL);
3822         if (c == -1) {
3823             break;
3824         }
3825 
3826         switch (c) {
3827         case ':':
3828             missing_argument(argv[optind - 1]);
3829             break;
3830         case '?':
3831             unrecognized_option(argv[optind - 1]);
3832             break;
3833         case 'h':
3834             help();
3835             break;
3836         case 'c':
3837         {
3838             unsigned long res;
3839 
3840             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
3841                 error_report("Invalid request count specified");
3842                 return 1;
3843             }
3844             count = res;
3845             break;
3846         }
3847         case 'd':
3848         {
3849             unsigned long res;
3850 
3851             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
3852                 error_report("Invalid queue depth specified");
3853                 return 1;
3854             }
3855             depth = res;
3856             break;
3857         }
3858         case 'f':
3859             fmt = optarg;
3860             break;
3861         case 'n':
3862             flags |= BDRV_O_NATIVE_AIO;
3863             break;
3864         case 'o':
3865         {
3866             offset = cvtnum(optarg);
3867             if (offset < 0) {
3868                 error_report("Invalid offset specified");
3869                 return 1;
3870             }
3871             break;
3872         }
3873             break;
3874         case 'q':
3875             quiet = true;
3876             break;
3877         case 's':
3878         {
3879             int64_t sval;
3880 
3881             sval = cvtnum(optarg);
3882             if (sval < 0 || sval > INT_MAX) {
3883                 error_report("Invalid buffer size specified");
3884                 return 1;
3885             }
3886 
3887             bufsize = sval;
3888             break;
3889         }
3890         case 'S':
3891         {
3892             int64_t sval;
3893 
3894             sval = cvtnum(optarg);
3895             if (sval < 0 || sval > INT_MAX) {
3896                 error_report("Invalid step size specified");
3897                 return 1;
3898             }
3899 
3900             step = sval;
3901             break;
3902         }
3903         case 't':
3904             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
3905             if (ret < 0) {
3906                 error_report("Invalid cache mode");
3907                 ret = -1;
3908                 goto out;
3909             }
3910             break;
3911         case 'w':
3912             flags |= BDRV_O_RDWR;
3913             is_write = true;
3914             break;
3915         case 'U':
3916             force_share = true;
3917             break;
3918         case OPTION_PATTERN:
3919         {
3920             unsigned long res;
3921 
3922             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
3923                 error_report("Invalid pattern byte specified");
3924                 return 1;
3925             }
3926             pattern = res;
3927             break;
3928         }
3929         case OPTION_FLUSH_INTERVAL:
3930         {
3931             unsigned long res;
3932 
3933             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
3934                 error_report("Invalid flush interval specified");
3935                 return 1;
3936             }
3937             flush_interval = res;
3938             break;
3939         }
3940         case OPTION_NO_DRAIN:
3941             drain_on_flush = false;
3942             break;
3943         case OPTION_IMAGE_OPTS:
3944             image_opts = true;
3945             break;
3946         }
3947     }
3948 
3949     if (optind != argc - 1) {
3950         error_exit("Expecting one image file name");
3951     }
3952     filename = argv[argc - 1];
3953 
3954     if (!is_write && flush_interval) {
3955         error_report("--flush-interval is only available in write tests");
3956         ret = -1;
3957         goto out;
3958     }
3959     if (flush_interval && flush_interval < depth) {
3960         error_report("Flush interval can't be smaller than depth");
3961         ret = -1;
3962         goto out;
3963     }
3964 
3965     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3966                    force_share);
3967     if (!blk) {
3968         ret = -1;
3969         goto out;
3970     }
3971 
3972     image_size = blk_getlength(blk);
3973     if (image_size < 0) {
3974         ret = image_size;
3975         goto out;
3976     }
3977 
3978     data = (BenchData) {
3979         .blk            = blk,
3980         .image_size     = image_size,
3981         .bufsize        = bufsize,
3982         .step           = step ?: bufsize,
3983         .nrreq          = depth,
3984         .n              = count,
3985         .offset         = offset,
3986         .write          = is_write,
3987         .flush_interval = flush_interval,
3988         .drain_on_flush = drain_on_flush,
3989     };
3990     printf("Sending %d %s requests, %d bytes each, %d in parallel "
3991            "(starting at offset %" PRId64 ", step size %d)\n",
3992            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
3993            data.offset, data.step);
3994     if (flush_interval) {
3995         printf("Sending flush every %d requests\n", flush_interval);
3996     }
3997 
3998     data.buf = blk_blockalign(blk, data.nrreq * data.bufsize);
3999     memset(data.buf, pattern, data.nrreq * data.bufsize);
4000 
4001     data.qiov = g_new(QEMUIOVector, data.nrreq);
4002     for (i = 0; i < data.nrreq; i++) {
4003         qemu_iovec_init(&data.qiov[i], 1);
4004         qemu_iovec_add(&data.qiov[i],
4005                        data.buf + i * data.bufsize, data.bufsize);
4006     }
4007 
4008     gettimeofday(&t1, NULL);
4009     bench_cb(&data, 0);
4010 
4011     while (data.n > 0) {
4012         main_loop_wait(false);
4013     }
4014     gettimeofday(&t2, NULL);
4015 
4016     printf("Run completed in %3.3f seconds.\n",
4017            (t2.tv_sec - t1.tv_sec)
4018            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4019 
4020 out:
4021     qemu_vfree(data.buf);
4022     blk_unref(blk);
4023 
4024     if (ret) {
4025         return 1;
4026     }
4027     return 0;
4028 }
4029 
4030 #define C_BS      01
4031 #define C_COUNT   02
4032 #define C_IF      04
4033 #define C_OF      010
4034 #define C_SKIP    020
4035 
4036 struct DdInfo {
4037     unsigned int flags;
4038     int64_t count;
4039 };
4040 
4041 struct DdIo {
4042     int bsz;    /* Block size */
4043     char *filename;
4044     uint8_t *buf;
4045     int64_t offset;
4046 };
4047 
4048 struct DdOpts {
4049     const char *name;
4050     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
4051     unsigned int flag;
4052 };
4053 
4054 static int img_dd_bs(const char *arg,
4055                      struct DdIo *in, struct DdIo *out,
4056                      struct DdInfo *dd)
4057 {
4058     int64_t res;
4059 
4060     res = cvtnum(arg);
4061 
4062     if (res <= 0 || res > INT_MAX) {
4063         error_report("invalid number: '%s'", arg);
4064         return 1;
4065     }
4066     in->bsz = out->bsz = res;
4067 
4068     return 0;
4069 }
4070 
4071 static int img_dd_count(const char *arg,
4072                         struct DdIo *in, struct DdIo *out,
4073                         struct DdInfo *dd)
4074 {
4075     dd->count = cvtnum(arg);
4076 
4077     if (dd->count < 0) {
4078         error_report("invalid number: '%s'", arg);
4079         return 1;
4080     }
4081 
4082     return 0;
4083 }
4084 
4085 static int img_dd_if(const char *arg,
4086                      struct DdIo *in, struct DdIo *out,
4087                      struct DdInfo *dd)
4088 {
4089     in->filename = g_strdup(arg);
4090 
4091     return 0;
4092 }
4093 
4094 static int img_dd_of(const char *arg,
4095                      struct DdIo *in, struct DdIo *out,
4096                      struct DdInfo *dd)
4097 {
4098     out->filename = g_strdup(arg);
4099 
4100     return 0;
4101 }
4102 
4103 static int img_dd_skip(const char *arg,
4104                        struct DdIo *in, struct DdIo *out,
4105                        struct DdInfo *dd)
4106 {
4107     in->offset = cvtnum(arg);
4108 
4109     if (in->offset < 0) {
4110         error_report("invalid number: '%s'", arg);
4111         return 1;
4112     }
4113 
4114     return 0;
4115 }
4116 
4117 static int img_dd(int argc, char **argv)
4118 {
4119     int ret = 0;
4120     char *arg = NULL;
4121     char *tmp;
4122     BlockDriver *drv = NULL, *proto_drv = NULL;
4123     BlockBackend *blk1 = NULL, *blk2 = NULL;
4124     QemuOpts *opts = NULL;
4125     QemuOptsList *create_opts = NULL;
4126     Error *local_err = NULL;
4127     bool image_opts = false;
4128     int c, i;
4129     const char *out_fmt = "raw";
4130     const char *fmt = NULL;
4131     int64_t size = 0;
4132     int64_t block_count = 0, out_pos, in_pos;
4133     bool force_share = false;
4134     struct DdInfo dd = {
4135         .flags = 0,
4136         .count = 0,
4137     };
4138     struct DdIo in = {
4139         .bsz = 512, /* Block size is by default 512 bytes */
4140         .filename = NULL,
4141         .buf = NULL,
4142         .offset = 0
4143     };
4144     struct DdIo out = {
4145         .bsz = 512,
4146         .filename = NULL,
4147         .buf = NULL,
4148         .offset = 0
4149     };
4150 
4151     const struct DdOpts options[] = {
4152         { "bs", img_dd_bs, C_BS },
4153         { "count", img_dd_count, C_COUNT },
4154         { "if", img_dd_if, C_IF },
4155         { "of", img_dd_of, C_OF },
4156         { "skip", img_dd_skip, C_SKIP },
4157         { NULL, NULL, 0 }
4158     };
4159     const struct option long_options[] = {
4160         { "help", no_argument, 0, 'h'},
4161         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4162         { "force-share", no_argument, 0, 'U'},
4163         { 0, 0, 0, 0 }
4164     };
4165 
4166     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
4167         if (c == EOF) {
4168             break;
4169         }
4170         switch (c) {
4171         case 'O':
4172             out_fmt = optarg;
4173             break;
4174         case 'f':
4175             fmt = optarg;
4176             break;
4177         case ':':
4178             missing_argument(argv[optind - 1]);
4179             break;
4180         case '?':
4181             unrecognized_option(argv[optind - 1]);
4182             break;
4183         case 'h':
4184             help();
4185             break;
4186         case 'U':
4187             force_share = true;
4188             break;
4189         case OPTION_IMAGE_OPTS:
4190             image_opts = true;
4191             break;
4192         }
4193     }
4194 
4195     for (i = optind; i < argc; i++) {
4196         int j;
4197         arg = g_strdup(argv[i]);
4198 
4199         tmp = strchr(arg, '=');
4200         if (tmp == NULL) {
4201             error_report("unrecognized operand %s", arg);
4202             ret = -1;
4203             goto out;
4204         }
4205 
4206         *tmp++ = '\0';
4207 
4208         for (j = 0; options[j].name != NULL; j++) {
4209             if (!strcmp(arg, options[j].name)) {
4210                 break;
4211             }
4212         }
4213         if (options[j].name == NULL) {
4214             error_report("unrecognized operand %s", arg);
4215             ret = -1;
4216             goto out;
4217         }
4218 
4219         if (options[j].f(tmp, &in, &out, &dd) != 0) {
4220             ret = -1;
4221             goto out;
4222         }
4223         dd.flags |= options[j].flag;
4224         g_free(arg);
4225         arg = NULL;
4226     }
4227 
4228     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4229         error_report("Must specify both input and output files");
4230         ret = -1;
4231         goto out;
4232     }
4233     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
4234                     force_share);
4235 
4236     if (!blk1) {
4237         ret = -1;
4238         goto out;
4239     }
4240 
4241     drv = bdrv_find_format(out_fmt);
4242     if (!drv) {
4243         error_report("Unknown file format");
4244         ret = -1;
4245         goto out;
4246     }
4247     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4248 
4249     if (!proto_drv) {
4250         error_report_err(local_err);
4251         ret = -1;
4252         goto out;
4253     }
4254     if (!drv->create_opts) {
4255         error_report("Format driver '%s' does not support image creation",
4256                      drv->format_name);
4257         ret = -1;
4258         goto out;
4259     }
4260     if (!proto_drv->create_opts) {
4261         error_report("Protocol driver '%s' does not support image creation",
4262                      proto_drv->format_name);
4263         ret = -1;
4264         goto out;
4265     }
4266     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4267     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4268 
4269     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4270 
4271     size = blk_getlength(blk1);
4272     if (size < 0) {
4273         error_report("Failed to get size for '%s'", in.filename);
4274         ret = -1;
4275         goto out;
4276     }
4277 
4278     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4279         dd.count * in.bsz < size) {
4280         size = dd.count * in.bsz;
4281     }
4282 
4283     /* Overflow means the specified offset is beyond input image's size */
4284     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4285                               size < in.bsz * in.offset)) {
4286         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4287     } else {
4288         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4289                             size - in.bsz * in.offset, &error_abort);
4290     }
4291 
4292     ret = bdrv_create(drv, out.filename, opts, &local_err);
4293     if (ret < 0) {
4294         error_reportf_err(local_err,
4295                           "%s: error while creating output image: ",
4296                           out.filename);
4297         ret = -1;
4298         goto out;
4299     }
4300 
4301     blk2 = img_open(image_opts, out.filename, out_fmt, BDRV_O_RDWR,
4302                     false, false, false);
4303 
4304     if (!blk2) {
4305         ret = -1;
4306         goto out;
4307     }
4308 
4309     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4310                               size < in.offset * in.bsz)) {
4311         /* We give a warning if the skip option is bigger than the input
4312          * size and create an empty output disk image (i.e. like dd(1)).
4313          */
4314         error_report("%s: cannot skip to specified offset", in.filename);
4315         in_pos = size;
4316     } else {
4317         in_pos = in.offset * in.bsz;
4318     }
4319 
4320     in.buf = g_new(uint8_t, in.bsz);
4321 
4322     for (out_pos = 0; in_pos < size; block_count++) {
4323         int in_ret, out_ret;
4324 
4325         if (in_pos + in.bsz > size) {
4326             in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4327         } else {
4328             in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4329         }
4330         if (in_ret < 0) {
4331             error_report("error while reading from input image file: %s",
4332                          strerror(-in_ret));
4333             ret = -1;
4334             goto out;
4335         }
4336         in_pos += in_ret;
4337 
4338         out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4339 
4340         if (out_ret < 0) {
4341             error_report("error while writing to output image file: %s",
4342                          strerror(-out_ret));
4343             ret = -1;
4344             goto out;
4345         }
4346         out_pos += out_ret;
4347     }
4348 
4349 out:
4350     g_free(arg);
4351     qemu_opts_del(opts);
4352     qemu_opts_free(create_opts);
4353     blk_unref(blk1);
4354     blk_unref(blk2);
4355     g_free(in.filename);
4356     g_free(out.filename);
4357     g_free(in.buf);
4358     g_free(out.buf);
4359 
4360     if (ret) {
4361         return 1;
4362     }
4363     return 0;
4364 }
4365 
4366 
4367 static const img_cmd_t img_cmds[] = {
4368 #define DEF(option, callback, arg_string)        \
4369     { option, callback },
4370 #include "qemu-img-cmds.h"
4371 #undef DEF
4372 #undef GEN_DOCS
4373     { NULL, NULL, },
4374 };
4375 
4376 int main(int argc, char **argv)
4377 {
4378     const img_cmd_t *cmd;
4379     const char *cmdname;
4380     Error *local_error = NULL;
4381     char *trace_file = NULL;
4382     int c;
4383     static const struct option long_options[] = {
4384         {"help", no_argument, 0, 'h'},
4385         {"version", no_argument, 0, 'V'},
4386         {"trace", required_argument, NULL, 'T'},
4387         {0, 0, 0, 0}
4388     };
4389 
4390 #ifdef CONFIG_POSIX
4391     signal(SIGPIPE, SIG_IGN);
4392 #endif
4393 
4394     module_call_init(MODULE_INIT_TRACE);
4395     error_set_progname(argv[0]);
4396     qemu_init_exec_dir(argv[0]);
4397 
4398     if (qemu_init_main_loop(&local_error)) {
4399         error_report_err(local_error);
4400         exit(EXIT_FAILURE);
4401     }
4402 
4403     qcrypto_init(&error_fatal);
4404 
4405     module_call_init(MODULE_INIT_QOM);
4406     bdrv_init();
4407     if (argc < 2) {
4408         error_exit("Not enough arguments");
4409     }
4410 
4411     qemu_add_opts(&qemu_object_opts);
4412     qemu_add_opts(&qemu_source_opts);
4413     qemu_add_opts(&qemu_trace_opts);
4414 
4415     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
4416         switch (c) {
4417         case ':':
4418             missing_argument(argv[optind - 1]);
4419             return 0;
4420         case '?':
4421             unrecognized_option(argv[optind - 1]);
4422             return 0;
4423         case 'h':
4424             help();
4425             return 0;
4426         case 'V':
4427             printf(QEMU_IMG_VERSION);
4428             return 0;
4429         case 'T':
4430             g_free(trace_file);
4431             trace_file = trace_opt_parse(optarg);
4432             break;
4433         }
4434     }
4435 
4436     cmdname = argv[optind];
4437 
4438     /* reset getopt_long scanning */
4439     argc -= optind;
4440     if (argc < 1) {
4441         return 0;
4442     }
4443     argv += optind;
4444     optind = 0;
4445 
4446     if (!trace_init_backends()) {
4447         exit(1);
4448     }
4449     trace_init_file(trace_file);
4450     qemu_set_log(LOG_TRACE);
4451 
4452     /* find the command */
4453     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
4454         if (!strcmp(cmdname, cmd->name)) {
4455             return cmd->handler(argc, argv);
4456         }
4457     }
4458 
4459     /* not found */
4460     error_exit("Command not found: %s", cmdname);
4461 }
4462