xref: /openbmc/qemu/qemu-img.c (revision 0ec7b3e7)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 #include "qemu/osdep.h"
25 #include "qemu-version.h"
26 #include "qapi/error.h"
27 #include "qapi-visit.h"
28 #include "qapi/qobject-output-visitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qapi/qmp/qjson.h"
31 #include "qemu/cutils.h"
32 #include "qemu/config-file.h"
33 #include "qemu/option.h"
34 #include "qemu/error-report.h"
35 #include "qemu/log.h"
36 #include "qom/object_interfaces.h"
37 #include "sysemu/sysemu.h"
38 #include "sysemu/block-backend.h"
39 #include "block/block_int.h"
40 #include "block/blockjob.h"
41 #include "block/qapi.h"
42 #include "crypto/init.h"
43 #include "trace/control.h"
44 #include <getopt.h>
45 
46 #define QEMU_IMG_VERSION "qemu-img version " QEMU_VERSION QEMU_PKGVERSION \
47                           "\n" QEMU_COPYRIGHT "\n"
48 
49 typedef struct img_cmd_t {
50     const char *name;
51     int (*handler)(int argc, char **argv);
52 } img_cmd_t;
53 
54 enum {
55     OPTION_OUTPUT = 256,
56     OPTION_BACKING_CHAIN = 257,
57     OPTION_OBJECT = 258,
58     OPTION_IMAGE_OPTS = 259,
59     OPTION_PATTERN = 260,
60     OPTION_FLUSH_INTERVAL = 261,
61     OPTION_NO_DRAIN = 262,
62 };
63 
64 typedef enum OutputFormat {
65     OFORMAT_JSON,
66     OFORMAT_HUMAN,
67 } OutputFormat;
68 
69 /* Default to cache=writeback as data integrity is not important for qemu-img */
70 #define BDRV_DEFAULT_CACHE "writeback"
71 
72 static void format_print(void *opaque, const char *name)
73 {
74     printf(" %s", name);
75 }
76 
77 static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
78 {
79     va_list ap;
80 
81     error_printf("qemu-img: ");
82 
83     va_start(ap, fmt);
84     error_vprintf(fmt, ap);
85     va_end(ap);
86 
87     error_printf("\nTry 'qemu-img --help' for more information\n");
88     exit(EXIT_FAILURE);
89 }
90 
91 /* Please keep in synch with qemu-img.texi */
92 static void QEMU_NORETURN help(void)
93 {
94     const char *help_msg =
95            QEMU_IMG_VERSION
96            "usage: qemu-img [standard options] command [command options]\n"
97            "QEMU disk image utility\n"
98            "\n"
99            "    '-h', '--help'       display this help and exit\n"
100            "    '-V', '--version'    output version information and exit\n"
101            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
102            "                         specify tracing options\n"
103            "\n"
104            "Command syntax:\n"
105 #define DEF(option, callback, arg_string)        \
106            "  " arg_string "\n"
107 #include "qemu-img-cmds.h"
108 #undef DEF
109 #undef GEN_DOCS
110            "\n"
111            "Command parameters:\n"
112            "  'filename' is a disk image filename\n"
113            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
114            "    manual page for a description of the object properties. The most common\n"
115            "    object type is a 'secret', which is used to supply passwords and/or\n"
116            "    encryption keys.\n"
117            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
118            "  'cache' is the cache mode used to write the output disk image, the valid\n"
119            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
120            "    'directsync' and 'unsafe' (default for convert)\n"
121            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
122            "    options are the same as for the 'cache' option\n"
123            "  'size' is the disk image size in bytes. Optional suffixes\n"
124            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
125            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
126            "    supported. 'b' is ignored.\n"
127            "  'output_filename' is the destination disk image filename\n"
128            "  'output_fmt' is the destination format\n"
129            "  'options' is a comma separated list of format specific options in a\n"
130            "    name=value format. Use -o ? for an overview of the options supported by the\n"
131            "    used format\n"
132            "  'snapshot_param' is param used for internal snapshot, format\n"
133            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
134            "    '[ID_OR_NAME]'\n"
135            "  'snapshot_id_or_name' is deprecated, use 'snapshot_param'\n"
136            "    instead\n"
137            "  '-c' indicates that target image must be compressed (qcow format only)\n"
138            "  '-u' enables unsafe rebasing. It is assumed that old and new backing file\n"
139            "       match exactly. The image doesn't need a working backing file before\n"
140            "       rebasing in this case (useful for renaming the backing file)\n"
141            "  '-h' with or without a command shows this help and lists the supported formats\n"
142            "  '-p' show progress of command (only certain commands)\n"
143            "  '-q' use Quiet mode - do not print any output (except errors)\n"
144            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
145            "       contain only zeros for qemu-img to create a sparse image during\n"
146            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
147            "       unallocated or zero sectors, and the destination image will always be\n"
148            "       fully allocated\n"
149            "  '--output' takes the format in which the output must be done (human or json)\n"
150            "  '-n' skips the target volume creation (useful if the volume is created\n"
151            "       prior to running qemu-img)\n"
152            "\n"
153            "Parameters to check subcommand:\n"
154            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
155            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
156            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
157            "       hiding corruption that has already occurred.\n"
158            "\n"
159            "Parameters to snapshot subcommand:\n"
160            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
161            "  '-a' applies a snapshot (revert disk to saved state)\n"
162            "  '-c' creates a snapshot\n"
163            "  '-d' deletes a snapshot\n"
164            "  '-l' lists all snapshots in the given image\n"
165            "\n"
166            "Parameters to compare subcommand:\n"
167            "  '-f' first image format\n"
168            "  '-F' second image format\n"
169            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
170            "\n"
171            "Parameters to dd subcommand:\n"
172            "  'bs=BYTES' read and write up to BYTES bytes at a time "
173            "(default: 512)\n"
174            "  'count=N' copy only N input blocks\n"
175            "  'if=FILE' read from FILE\n"
176            "  'of=FILE' write to FILE\n"
177            "  'skip=N' skip N bs-sized blocks at the start of input\n";
178 
179     printf("%s\nSupported formats:", help_msg);
180     bdrv_iterate_format(format_print, NULL);
181     printf("\n");
182     exit(EXIT_SUCCESS);
183 }
184 
185 static QemuOptsList qemu_object_opts = {
186     .name = "object",
187     .implied_opt_name = "qom-type",
188     .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
189     .desc = {
190         { }
191     },
192 };
193 
194 static QemuOptsList qemu_source_opts = {
195     .name = "source",
196     .implied_opt_name = "file",
197     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
198     .desc = {
199         { }
200     },
201 };
202 
203 static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
204 {
205     int ret = 0;
206     if (!quiet) {
207         va_list args;
208         va_start(args, fmt);
209         ret = vprintf(fmt, args);
210         va_end(args);
211     }
212     return ret;
213 }
214 
215 
216 static int print_block_option_help(const char *filename, const char *fmt)
217 {
218     BlockDriver *drv, *proto_drv;
219     QemuOptsList *create_opts = NULL;
220     Error *local_err = NULL;
221 
222     /* Find driver and parse its options */
223     drv = bdrv_find_format(fmt);
224     if (!drv) {
225         error_report("Unknown file format '%s'", fmt);
226         return 1;
227     }
228 
229     create_opts = qemu_opts_append(create_opts, drv->create_opts);
230     if (filename) {
231         proto_drv = bdrv_find_protocol(filename, true, &local_err);
232         if (!proto_drv) {
233             error_report_err(local_err);
234             qemu_opts_free(create_opts);
235             return 1;
236         }
237         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
238     }
239 
240     qemu_opts_print_help(create_opts);
241     qemu_opts_free(create_opts);
242     return 0;
243 }
244 
245 
246 static int img_open_password(BlockBackend *blk, const char *filename,
247                              int flags, bool quiet)
248 {
249     BlockDriverState *bs;
250     char password[256];
251 
252     bs = blk_bs(blk);
253     if (bdrv_is_encrypted(bs) && bdrv_key_required(bs) &&
254         !(flags & BDRV_O_NO_IO)) {
255         qprintf(quiet, "Disk image '%s' is encrypted.\n", filename);
256         if (qemu_read_password(password, sizeof(password)) < 0) {
257             error_report("No password given");
258             return -1;
259         }
260         if (bdrv_set_key(bs, password) < 0) {
261             error_report("invalid password");
262             return -1;
263         }
264     }
265     return 0;
266 }
267 
268 
269 static BlockBackend *img_open_opts(const char *optstr,
270                                    QemuOpts *opts, int flags, bool writethrough,
271                                    bool quiet)
272 {
273     QDict *options;
274     Error *local_err = NULL;
275     BlockBackend *blk;
276     options = qemu_opts_to_qdict(opts, NULL);
277     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
278     if (!blk) {
279         error_reportf_err(local_err, "Could not open '%s': ", optstr);
280         return NULL;
281     }
282     blk_set_enable_write_cache(blk, !writethrough);
283 
284     if (img_open_password(blk, optstr, flags, quiet) < 0) {
285         blk_unref(blk);
286         return NULL;
287     }
288     return blk;
289 }
290 
291 static BlockBackend *img_open_file(const char *filename,
292                                    const char *fmt, int flags,
293                                    bool writethrough, bool quiet)
294 {
295     BlockBackend *blk;
296     Error *local_err = NULL;
297     QDict *options = NULL;
298 
299     if (fmt) {
300         options = qdict_new();
301         qdict_put(options, "driver", qstring_from_str(fmt));
302     }
303 
304     blk = blk_new_open(filename, NULL, options, flags, &local_err);
305     if (!blk) {
306         error_reportf_err(local_err, "Could not open '%s': ", filename);
307         return NULL;
308     }
309     blk_set_enable_write_cache(blk, !writethrough);
310 
311     if (img_open_password(blk, filename, flags, quiet) < 0) {
312         blk_unref(blk);
313         return NULL;
314     }
315     return blk;
316 }
317 
318 
319 static BlockBackend *img_open(bool image_opts,
320                               const char *filename,
321                               const char *fmt, int flags, bool writethrough,
322                               bool quiet)
323 {
324     BlockBackend *blk;
325     if (image_opts) {
326         QemuOpts *opts;
327         if (fmt) {
328             error_report("--image-opts and --format are mutually exclusive");
329             return NULL;
330         }
331         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
332                                        filename, true);
333         if (!opts) {
334             return NULL;
335         }
336         blk = img_open_opts(filename, opts, flags, writethrough, quiet);
337     } else {
338         blk = img_open_file(filename, fmt, flags, writethrough, quiet);
339     }
340     return blk;
341 }
342 
343 
344 static int add_old_style_options(const char *fmt, QemuOpts *opts,
345                                  const char *base_filename,
346                                  const char *base_fmt)
347 {
348     Error *err = NULL;
349 
350     if (base_filename) {
351         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
352         if (err) {
353             error_report("Backing file not supported for file format '%s'",
354                          fmt);
355             error_free(err);
356             return -1;
357         }
358     }
359     if (base_fmt) {
360         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
361         if (err) {
362             error_report("Backing file format not supported for file "
363                          "format '%s'", fmt);
364             error_free(err);
365             return -1;
366         }
367     }
368     return 0;
369 }
370 
371 static int img_create(int argc, char **argv)
372 {
373     int c;
374     uint64_t img_size = -1;
375     const char *fmt = "raw";
376     const char *base_fmt = NULL;
377     const char *filename;
378     const char *base_filename = NULL;
379     char *options = NULL;
380     Error *local_err = NULL;
381     bool quiet = false;
382 
383     for(;;) {
384         static const struct option long_options[] = {
385             {"help", no_argument, 0, 'h'},
386             {"object", required_argument, 0, OPTION_OBJECT},
387             {0, 0, 0, 0}
388         };
389         c = getopt_long(argc, argv, "F:b:f:he6o:q",
390                         long_options, NULL);
391         if (c == -1) {
392             break;
393         }
394         switch(c) {
395         case '?':
396         case 'h':
397             help();
398             break;
399         case 'F':
400             base_fmt = optarg;
401             break;
402         case 'b':
403             base_filename = optarg;
404             break;
405         case 'f':
406             fmt = optarg;
407             break;
408         case 'e':
409             error_report("option -e is deprecated, please use \'-o "
410                   "encryption\' instead!");
411             goto fail;
412         case '6':
413             error_report("option -6 is deprecated, please use \'-o "
414                   "compat6\' instead!");
415             goto fail;
416         case 'o':
417             if (!is_valid_option_list(optarg)) {
418                 error_report("Invalid option list: %s", optarg);
419                 goto fail;
420             }
421             if (!options) {
422                 options = g_strdup(optarg);
423             } else {
424                 char *old_options = options;
425                 options = g_strdup_printf("%s,%s", options, optarg);
426                 g_free(old_options);
427             }
428             break;
429         case 'q':
430             quiet = true;
431             break;
432         case OPTION_OBJECT: {
433             QemuOpts *opts;
434             opts = qemu_opts_parse_noisily(&qemu_object_opts,
435                                            optarg, true);
436             if (!opts) {
437                 goto fail;
438             }
439         }   break;
440         }
441     }
442 
443     /* Get the filename */
444     filename = (optind < argc) ? argv[optind] : NULL;
445     if (options && has_help_option(options)) {
446         g_free(options);
447         return print_block_option_help(filename, fmt);
448     }
449 
450     if (optind >= argc) {
451         error_exit("Expecting image file name");
452     }
453     optind++;
454 
455     if (qemu_opts_foreach(&qemu_object_opts,
456                           user_creatable_add_opts_foreach,
457                           NULL, NULL)) {
458         goto fail;
459     }
460 
461     /* Get image size, if specified */
462     if (optind < argc) {
463         int64_t sval;
464         char *end;
465         sval = qemu_strtosz_suffix(argv[optind++], &end,
466                                    QEMU_STRTOSZ_DEFSUFFIX_B);
467         if (sval < 0 || *end) {
468             if (sval == -ERANGE) {
469                 error_report("Image size must be less than 8 EiB!");
470             } else {
471                 error_report("Invalid image size specified! You may use k, M, "
472                       "G, T, P or E suffixes for ");
473                 error_report("kilobytes, megabytes, gigabytes, terabytes, "
474                              "petabytes and exabytes.");
475             }
476             goto fail;
477         }
478         img_size = (uint64_t)sval;
479     }
480     if (optind != argc) {
481         error_exit("Unexpected argument: %s", argv[optind]);
482     }
483 
484     bdrv_img_create(filename, fmt, base_filename, base_fmt,
485                     options, img_size, 0, &local_err, quiet);
486     if (local_err) {
487         error_reportf_err(local_err, "%s: ", filename);
488         goto fail;
489     }
490 
491     g_free(options);
492     return 0;
493 
494 fail:
495     g_free(options);
496     return 1;
497 }
498 
499 static void dump_json_image_check(ImageCheck *check, bool quiet)
500 {
501     QString *str;
502     QObject *obj;
503     Visitor *v = qobject_output_visitor_new(&obj);
504 
505     visit_type_ImageCheck(v, NULL, &check, &error_abort);
506     visit_complete(v, &obj);
507     str = qobject_to_json_pretty(obj);
508     assert(str != NULL);
509     qprintf(quiet, "%s\n", qstring_get_str(str));
510     qobject_decref(obj);
511     visit_free(v);
512     QDECREF(str);
513 }
514 
515 static void dump_human_image_check(ImageCheck *check, bool quiet)
516 {
517     if (!(check->corruptions || check->leaks || check->check_errors)) {
518         qprintf(quiet, "No errors were found on the image.\n");
519     } else {
520         if (check->corruptions) {
521             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
522                     "Data may be corrupted, or further writes to the image "
523                     "may corrupt it.\n",
524                     check->corruptions);
525         }
526 
527         if (check->leaks) {
528             qprintf(quiet,
529                     "\n%" PRId64 " leaked clusters were found on the image.\n"
530                     "This means waste of disk space, but no harm to data.\n",
531                     check->leaks);
532         }
533 
534         if (check->check_errors) {
535             qprintf(quiet,
536                     "\n%" PRId64
537                     " internal errors have occurred during the check.\n",
538                     check->check_errors);
539         }
540     }
541 
542     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
543         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
544                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
545                 check->allocated_clusters, check->total_clusters,
546                 check->allocated_clusters * 100.0 / check->total_clusters,
547                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
548                 check->compressed_clusters * 100.0 /
549                 check->allocated_clusters);
550     }
551 
552     if (check->image_end_offset) {
553         qprintf(quiet,
554                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
555     }
556 }
557 
558 static int collect_image_check(BlockDriverState *bs,
559                    ImageCheck *check,
560                    const char *filename,
561                    const char *fmt,
562                    int fix)
563 {
564     int ret;
565     BdrvCheckResult result;
566 
567     ret = bdrv_check(bs, &result, fix);
568     if (ret < 0) {
569         return ret;
570     }
571 
572     check->filename                 = g_strdup(filename);
573     check->format                   = g_strdup(bdrv_get_format_name(bs));
574     check->check_errors             = result.check_errors;
575     check->corruptions              = result.corruptions;
576     check->has_corruptions          = result.corruptions != 0;
577     check->leaks                    = result.leaks;
578     check->has_leaks                = result.leaks != 0;
579     check->corruptions_fixed        = result.corruptions_fixed;
580     check->has_corruptions_fixed    = result.corruptions != 0;
581     check->leaks_fixed              = result.leaks_fixed;
582     check->has_leaks_fixed          = result.leaks != 0;
583     check->image_end_offset         = result.image_end_offset;
584     check->has_image_end_offset     = result.image_end_offset != 0;
585     check->total_clusters           = result.bfi.total_clusters;
586     check->has_total_clusters       = result.bfi.total_clusters != 0;
587     check->allocated_clusters       = result.bfi.allocated_clusters;
588     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
589     check->fragmented_clusters      = result.bfi.fragmented_clusters;
590     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
591     check->compressed_clusters      = result.bfi.compressed_clusters;
592     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
593 
594     return 0;
595 }
596 
597 /*
598  * Checks an image for consistency. Exit codes:
599  *
600  *  0 - Check completed, image is good
601  *  1 - Check not completed because of internal errors
602  *  2 - Check completed, image is corrupted
603  *  3 - Check completed, image has leaked clusters, but is good otherwise
604  * 63 - Checks are not supported by the image format
605  */
606 static int img_check(int argc, char **argv)
607 {
608     int c, ret;
609     OutputFormat output_format = OFORMAT_HUMAN;
610     const char *filename, *fmt, *output, *cache;
611     BlockBackend *blk;
612     BlockDriverState *bs;
613     int fix = 0;
614     int flags = BDRV_O_CHECK;
615     bool writethrough;
616     ImageCheck *check;
617     bool quiet = false;
618     bool image_opts = false;
619 
620     fmt = NULL;
621     output = NULL;
622     cache = BDRV_DEFAULT_CACHE;
623 
624     for(;;) {
625         int option_index = 0;
626         static const struct option long_options[] = {
627             {"help", no_argument, 0, 'h'},
628             {"format", required_argument, 0, 'f'},
629             {"repair", required_argument, 0, 'r'},
630             {"output", required_argument, 0, OPTION_OUTPUT},
631             {"object", required_argument, 0, OPTION_OBJECT},
632             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
633             {0, 0, 0, 0}
634         };
635         c = getopt_long(argc, argv, "hf:r:T:q",
636                         long_options, &option_index);
637         if (c == -1) {
638             break;
639         }
640         switch(c) {
641         case '?':
642         case 'h':
643             help();
644             break;
645         case 'f':
646             fmt = optarg;
647             break;
648         case 'r':
649             flags |= BDRV_O_RDWR;
650 
651             if (!strcmp(optarg, "leaks")) {
652                 fix = BDRV_FIX_LEAKS;
653             } else if (!strcmp(optarg, "all")) {
654                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
655             } else {
656                 error_exit("Unknown option value for -r "
657                            "(expecting 'leaks' or 'all'): %s", optarg);
658             }
659             break;
660         case OPTION_OUTPUT:
661             output = optarg;
662             break;
663         case 'T':
664             cache = optarg;
665             break;
666         case 'q':
667             quiet = true;
668             break;
669         case OPTION_OBJECT: {
670             QemuOpts *opts;
671             opts = qemu_opts_parse_noisily(&qemu_object_opts,
672                                            optarg, true);
673             if (!opts) {
674                 return 1;
675             }
676         }   break;
677         case OPTION_IMAGE_OPTS:
678             image_opts = true;
679             break;
680         }
681     }
682     if (optind != argc - 1) {
683         error_exit("Expecting one image file name");
684     }
685     filename = argv[optind++];
686 
687     if (output && !strcmp(output, "json")) {
688         output_format = OFORMAT_JSON;
689     } else if (output && !strcmp(output, "human")) {
690         output_format = OFORMAT_HUMAN;
691     } else if (output) {
692         error_report("--output must be used with human or json as argument.");
693         return 1;
694     }
695 
696     if (qemu_opts_foreach(&qemu_object_opts,
697                           user_creatable_add_opts_foreach,
698                           NULL, NULL)) {
699         return 1;
700     }
701 
702     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
703     if (ret < 0) {
704         error_report("Invalid source cache option: %s", cache);
705         return 1;
706     }
707 
708     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
709     if (!blk) {
710         return 1;
711     }
712     bs = blk_bs(blk);
713 
714     check = g_new0(ImageCheck, 1);
715     ret = collect_image_check(bs, check, filename, fmt, fix);
716 
717     if (ret == -ENOTSUP) {
718         error_report("This image format does not support checks");
719         ret = 63;
720         goto fail;
721     }
722 
723     if (check->corruptions_fixed || check->leaks_fixed) {
724         int corruptions_fixed, leaks_fixed;
725 
726         leaks_fixed         = check->leaks_fixed;
727         corruptions_fixed   = check->corruptions_fixed;
728 
729         if (output_format == OFORMAT_HUMAN) {
730             qprintf(quiet,
731                     "The following inconsistencies were found and repaired:\n\n"
732                     "    %" PRId64 " leaked clusters\n"
733                     "    %" PRId64 " corruptions\n\n"
734                     "Double checking the fixed image now...\n",
735                     check->leaks_fixed,
736                     check->corruptions_fixed);
737         }
738 
739         ret = collect_image_check(bs, check, filename, fmt, 0);
740 
741         check->leaks_fixed          = leaks_fixed;
742         check->corruptions_fixed    = corruptions_fixed;
743     }
744 
745     if (!ret) {
746         switch (output_format) {
747         case OFORMAT_HUMAN:
748             dump_human_image_check(check, quiet);
749             break;
750         case OFORMAT_JSON:
751             dump_json_image_check(check, quiet);
752             break;
753         }
754     }
755 
756     if (ret || check->check_errors) {
757         if (ret) {
758             error_report("Check failed: %s", strerror(-ret));
759         } else {
760             error_report("Check failed");
761         }
762         ret = 1;
763         goto fail;
764     }
765 
766     if (check->corruptions) {
767         ret = 2;
768     } else if (check->leaks) {
769         ret = 3;
770     } else {
771         ret = 0;
772     }
773 
774 fail:
775     qapi_free_ImageCheck(check);
776     blk_unref(blk);
777     return ret;
778 }
779 
780 typedef struct CommonBlockJobCBInfo {
781     BlockDriverState *bs;
782     Error **errp;
783 } CommonBlockJobCBInfo;
784 
785 static void common_block_job_cb(void *opaque, int ret)
786 {
787     CommonBlockJobCBInfo *cbi = opaque;
788 
789     if (ret < 0) {
790         error_setg_errno(cbi->errp, -ret, "Block job failed");
791     }
792 }
793 
794 static void run_block_job(BlockJob *job, Error **errp)
795 {
796     AioContext *aio_context = blk_get_aio_context(job->blk);
797 
798     aio_context_acquire(aio_context);
799     do {
800         aio_poll(aio_context, true);
801         qemu_progress_print(job->len ?
802                             ((float)job->offset / job->len * 100.f) : 0.0f, 0);
803     } while (!job->ready);
804 
805     block_job_complete_sync(job, errp);
806     aio_context_release(aio_context);
807 
808     /* A block job may finish instantaneously without publishing any progress,
809      * so just signal completion here */
810     qemu_progress_print(100.f, 0);
811 }
812 
813 static int img_commit(int argc, char **argv)
814 {
815     int c, ret, flags;
816     const char *filename, *fmt, *cache, *base;
817     BlockBackend *blk;
818     BlockDriverState *bs, *base_bs;
819     bool progress = false, quiet = false, drop = false;
820     bool writethrough;
821     Error *local_err = NULL;
822     CommonBlockJobCBInfo cbi;
823     bool image_opts = false;
824     AioContext *aio_context;
825 
826     fmt = NULL;
827     cache = BDRV_DEFAULT_CACHE;
828     base = NULL;
829     for(;;) {
830         static const struct option long_options[] = {
831             {"help", no_argument, 0, 'h'},
832             {"object", required_argument, 0, OPTION_OBJECT},
833             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
834             {0, 0, 0, 0}
835         };
836         c = getopt_long(argc, argv, "f:ht:b:dpq",
837                         long_options, NULL);
838         if (c == -1) {
839             break;
840         }
841         switch(c) {
842         case '?':
843         case 'h':
844             help();
845             break;
846         case 'f':
847             fmt = optarg;
848             break;
849         case 't':
850             cache = optarg;
851             break;
852         case 'b':
853             base = optarg;
854             /* -b implies -d */
855             drop = true;
856             break;
857         case 'd':
858             drop = true;
859             break;
860         case 'p':
861             progress = true;
862             break;
863         case 'q':
864             quiet = true;
865             break;
866         case OPTION_OBJECT: {
867             QemuOpts *opts;
868             opts = qemu_opts_parse_noisily(&qemu_object_opts,
869                                            optarg, true);
870             if (!opts) {
871                 return 1;
872             }
873         }   break;
874         case OPTION_IMAGE_OPTS:
875             image_opts = true;
876             break;
877         }
878     }
879 
880     /* Progress is not shown in Quiet mode */
881     if (quiet) {
882         progress = false;
883     }
884 
885     if (optind != argc - 1) {
886         error_exit("Expecting one image file name");
887     }
888     filename = argv[optind++];
889 
890     if (qemu_opts_foreach(&qemu_object_opts,
891                           user_creatable_add_opts_foreach,
892                           NULL, NULL)) {
893         return 1;
894     }
895 
896     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
897     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
898     if (ret < 0) {
899         error_report("Invalid cache option: %s", cache);
900         return 1;
901     }
902 
903     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
904     if (!blk) {
905         return 1;
906     }
907     bs = blk_bs(blk);
908 
909     qemu_progress_init(progress, 1.f);
910     qemu_progress_print(0.f, 100);
911 
912     if (base) {
913         base_bs = bdrv_find_backing_image(bs, base);
914         if (!base_bs) {
915             error_setg(&local_err, QERR_BASE_NOT_FOUND, base);
916             goto done;
917         }
918     } else {
919         /* This is different from QMP, which by default uses the deepest file in
920          * the backing chain (i.e., the very base); however, the traditional
921          * behavior of qemu-img commit is using the immediate backing file. */
922         base_bs = backing_bs(bs);
923         if (!base_bs) {
924             error_setg(&local_err, "Image does not have a backing file");
925             goto done;
926         }
927     }
928 
929     cbi = (CommonBlockJobCBInfo){
930         .errp = &local_err,
931         .bs   = bs,
932     };
933 
934     aio_context = bdrv_get_aio_context(bs);
935     aio_context_acquire(aio_context);
936     commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0,
937                         BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi,
938                         &local_err, false);
939     aio_context_release(aio_context);
940     if (local_err) {
941         goto done;
942     }
943 
944     /* When the block job completes, the BlockBackend reference will point to
945      * the old backing file. In order to avoid that the top image is already
946      * deleted, so we can still empty it afterwards, increment the reference
947      * counter here preemptively. */
948     if (!drop) {
949         bdrv_ref(bs);
950     }
951 
952     run_block_job(bs->job, &local_err);
953     if (local_err) {
954         goto unref_backing;
955     }
956 
957     if (!drop && bs->drv->bdrv_make_empty) {
958         ret = bs->drv->bdrv_make_empty(bs);
959         if (ret) {
960             error_setg_errno(&local_err, -ret, "Could not empty %s",
961                              filename);
962             goto unref_backing;
963         }
964     }
965 
966 unref_backing:
967     if (!drop) {
968         bdrv_unref(bs);
969     }
970 
971 done:
972     qemu_progress_end();
973 
974     blk_unref(blk);
975 
976     if (local_err) {
977         error_report_err(local_err);
978         return 1;
979     }
980 
981     qprintf(quiet, "Image committed.\n");
982     return 0;
983 }
984 
985 /*
986  * Returns true iff the first sector pointed to by 'buf' contains at least
987  * a non-NUL byte.
988  *
989  * 'pnum' is set to the number of sectors (including and immediately following
990  * the first one) that are known to be in the same allocated/unallocated state.
991  */
992 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum)
993 {
994     bool is_zero;
995     int i;
996 
997     if (n <= 0) {
998         *pnum = 0;
999         return 0;
1000     }
1001     is_zero = buffer_is_zero(buf, 512);
1002     for(i = 1; i < n; i++) {
1003         buf += 512;
1004         if (is_zero != buffer_is_zero(buf, 512)) {
1005             break;
1006         }
1007     }
1008     *pnum = i;
1009     return !is_zero;
1010 }
1011 
1012 /*
1013  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1014  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1015  * breaking up write requests for only small sparse areas.
1016  */
1017 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1018     int min)
1019 {
1020     int ret;
1021     int num_checked, num_used;
1022 
1023     if (n < min) {
1024         min = n;
1025     }
1026 
1027     ret = is_allocated_sectors(buf, n, pnum);
1028     if (!ret) {
1029         return ret;
1030     }
1031 
1032     num_used = *pnum;
1033     buf += BDRV_SECTOR_SIZE * *pnum;
1034     n -= *pnum;
1035     num_checked = num_used;
1036 
1037     while (n > 0) {
1038         ret = is_allocated_sectors(buf, n, pnum);
1039 
1040         buf += BDRV_SECTOR_SIZE * *pnum;
1041         n -= *pnum;
1042         num_checked += *pnum;
1043         if (ret) {
1044             num_used = num_checked;
1045         } else if (*pnum >= min) {
1046             break;
1047         }
1048     }
1049 
1050     *pnum = num_used;
1051     return 1;
1052 }
1053 
1054 /*
1055  * Compares two buffers sector by sector. Returns 0 if the first sector of both
1056  * buffers matches, non-zero otherwise.
1057  *
1058  * pnum is set to the number of sectors (including and immediately following
1059  * the first one) that are known to have the same comparison result
1060  */
1061 static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n,
1062     int *pnum)
1063 {
1064     bool res;
1065     int i;
1066 
1067     if (n <= 0) {
1068         *pnum = 0;
1069         return 0;
1070     }
1071 
1072     res = !!memcmp(buf1, buf2, 512);
1073     for(i = 1; i < n; i++) {
1074         buf1 += 512;
1075         buf2 += 512;
1076 
1077         if (!!memcmp(buf1, buf2, 512) != res) {
1078             break;
1079         }
1080     }
1081 
1082     *pnum = i;
1083     return res;
1084 }
1085 
1086 #define IO_BUF_SIZE (2 * 1024 * 1024)
1087 
1088 static int64_t sectors_to_bytes(int64_t sectors)
1089 {
1090     return sectors << BDRV_SECTOR_BITS;
1091 }
1092 
1093 static int64_t sectors_to_process(int64_t total, int64_t from)
1094 {
1095     return MIN(total - from, IO_BUF_SIZE >> BDRV_SECTOR_BITS);
1096 }
1097 
1098 /*
1099  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1100  *
1101  * Returns 0 in case sectors are filled with 0, 1 if sectors contain non-zero
1102  * data and negative value on error.
1103  *
1104  * @param blk:  BlockBackend for the image
1105  * @param sect_num: Number of first sector to check
1106  * @param sect_count: Number of sectors to check
1107  * @param filename: Name of disk file we are checking (logging purpose)
1108  * @param buffer: Allocated buffer for storing read data
1109  * @param quiet: Flag for quiet mode
1110  */
1111 static int check_empty_sectors(BlockBackend *blk, int64_t sect_num,
1112                                int sect_count, const char *filename,
1113                                uint8_t *buffer, bool quiet)
1114 {
1115     int pnum, ret = 0;
1116     ret = blk_pread(blk, sect_num << BDRV_SECTOR_BITS, buffer,
1117                     sect_count << BDRV_SECTOR_BITS);
1118     if (ret < 0) {
1119         error_report("Error while reading offset %" PRId64 " of %s: %s",
1120                      sectors_to_bytes(sect_num), filename, strerror(-ret));
1121         return ret;
1122     }
1123     ret = is_allocated_sectors(buffer, sect_count, &pnum);
1124     if (ret || pnum != sect_count) {
1125         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1126                 sectors_to_bytes(ret ? sect_num : sect_num + pnum));
1127         return 1;
1128     }
1129 
1130     return 0;
1131 }
1132 
1133 /*
1134  * Compares two images. Exit codes:
1135  *
1136  * 0 - Images are identical
1137  * 1 - Images differ
1138  * >1 - Error occurred
1139  */
1140 static int img_compare(int argc, char **argv)
1141 {
1142     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1143     BlockBackend *blk1, *blk2;
1144     BlockDriverState *bs1, *bs2;
1145     int64_t total_sectors1, total_sectors2;
1146     uint8_t *buf1 = NULL, *buf2 = NULL;
1147     int pnum1, pnum2;
1148     int allocated1, allocated2;
1149     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1150     bool progress = false, quiet = false, strict = false;
1151     int flags;
1152     bool writethrough;
1153     int64_t total_sectors;
1154     int64_t sector_num = 0;
1155     int64_t nb_sectors;
1156     int c, pnum;
1157     uint64_t progress_base;
1158     bool image_opts = false;
1159 
1160     cache = BDRV_DEFAULT_CACHE;
1161     for (;;) {
1162         static const struct option long_options[] = {
1163             {"help", no_argument, 0, 'h'},
1164             {"object", required_argument, 0, OPTION_OBJECT},
1165             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1166             {0, 0, 0, 0}
1167         };
1168         c = getopt_long(argc, argv, "hf:F:T:pqs",
1169                         long_options, NULL);
1170         if (c == -1) {
1171             break;
1172         }
1173         switch (c) {
1174         case '?':
1175         case 'h':
1176             help();
1177             break;
1178         case 'f':
1179             fmt1 = optarg;
1180             break;
1181         case 'F':
1182             fmt2 = optarg;
1183             break;
1184         case 'T':
1185             cache = optarg;
1186             break;
1187         case 'p':
1188             progress = true;
1189             break;
1190         case 'q':
1191             quiet = true;
1192             break;
1193         case 's':
1194             strict = true;
1195             break;
1196         case OPTION_OBJECT: {
1197             QemuOpts *opts;
1198             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1199                                            optarg, true);
1200             if (!opts) {
1201                 ret = 2;
1202                 goto out4;
1203             }
1204         }   break;
1205         case OPTION_IMAGE_OPTS:
1206             image_opts = true;
1207             break;
1208         }
1209     }
1210 
1211     /* Progress is not shown in Quiet mode */
1212     if (quiet) {
1213         progress = false;
1214     }
1215 
1216 
1217     if (optind != argc - 2) {
1218         error_exit("Expecting two image file names");
1219     }
1220     filename1 = argv[optind++];
1221     filename2 = argv[optind++];
1222 
1223     if (qemu_opts_foreach(&qemu_object_opts,
1224                           user_creatable_add_opts_foreach,
1225                           NULL, NULL)) {
1226         ret = 2;
1227         goto out4;
1228     }
1229 
1230     /* Initialize before goto out */
1231     qemu_progress_init(progress, 2.0);
1232 
1233     flags = 0;
1234     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1235     if (ret < 0) {
1236         error_report("Invalid source cache option: %s", cache);
1237         ret = 2;
1238         goto out3;
1239     }
1240 
1241     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet);
1242     if (!blk1) {
1243         ret = 2;
1244         goto out3;
1245     }
1246 
1247     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet);
1248     if (!blk2) {
1249         ret = 2;
1250         goto out2;
1251     }
1252     bs1 = blk_bs(blk1);
1253     bs2 = blk_bs(blk2);
1254 
1255     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1256     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1257     total_sectors1 = blk_nb_sectors(blk1);
1258     if (total_sectors1 < 0) {
1259         error_report("Can't get size of %s: %s",
1260                      filename1, strerror(-total_sectors1));
1261         ret = 4;
1262         goto out;
1263     }
1264     total_sectors2 = blk_nb_sectors(blk2);
1265     if (total_sectors2 < 0) {
1266         error_report("Can't get size of %s: %s",
1267                      filename2, strerror(-total_sectors2));
1268         ret = 4;
1269         goto out;
1270     }
1271     total_sectors = MIN(total_sectors1, total_sectors2);
1272     progress_base = MAX(total_sectors1, total_sectors2);
1273 
1274     qemu_progress_print(0, 100);
1275 
1276     if (strict && total_sectors1 != total_sectors2) {
1277         ret = 1;
1278         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1279         goto out;
1280     }
1281 
1282     for (;;) {
1283         int64_t status1, status2;
1284         BlockDriverState *file;
1285 
1286         nb_sectors = sectors_to_process(total_sectors, sector_num);
1287         if (nb_sectors <= 0) {
1288             break;
1289         }
1290         status1 = bdrv_get_block_status_above(bs1, NULL, sector_num,
1291                                               total_sectors1 - sector_num,
1292                                               &pnum1, &file);
1293         if (status1 < 0) {
1294             ret = 3;
1295             error_report("Sector allocation test failed for %s", filename1);
1296             goto out;
1297         }
1298         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1299 
1300         status2 = bdrv_get_block_status_above(bs2, NULL, sector_num,
1301                                               total_sectors2 - sector_num,
1302                                               &pnum2, &file);
1303         if (status2 < 0) {
1304             ret = 3;
1305             error_report("Sector allocation test failed for %s", filename2);
1306             goto out;
1307         }
1308         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1309         if (pnum1) {
1310             nb_sectors = MIN(nb_sectors, pnum1);
1311         }
1312         if (pnum2) {
1313             nb_sectors = MIN(nb_sectors, pnum2);
1314         }
1315 
1316         if (strict) {
1317             if ((status1 & ~BDRV_BLOCK_OFFSET_MASK) !=
1318                 (status2 & ~BDRV_BLOCK_OFFSET_MASK)) {
1319                 ret = 1;
1320                 qprintf(quiet, "Strict mode: Offset %" PRId64
1321                         " block status mismatch!\n",
1322                         sectors_to_bytes(sector_num));
1323                 goto out;
1324             }
1325         }
1326         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1327             nb_sectors = MIN(pnum1, pnum2);
1328         } else if (allocated1 == allocated2) {
1329             if (allocated1) {
1330                 ret = blk_pread(blk1, sector_num << BDRV_SECTOR_BITS, buf1,
1331                                 nb_sectors << BDRV_SECTOR_BITS);
1332                 if (ret < 0) {
1333                     error_report("Error while reading offset %" PRId64 " of %s:"
1334                                  " %s", sectors_to_bytes(sector_num), filename1,
1335                                  strerror(-ret));
1336                     ret = 4;
1337                     goto out;
1338                 }
1339                 ret = blk_pread(blk2, sector_num << BDRV_SECTOR_BITS, buf2,
1340                                 nb_sectors << BDRV_SECTOR_BITS);
1341                 if (ret < 0) {
1342                     error_report("Error while reading offset %" PRId64
1343                                  " of %s: %s", sectors_to_bytes(sector_num),
1344                                  filename2, strerror(-ret));
1345                     ret = 4;
1346                     goto out;
1347                 }
1348                 ret = compare_sectors(buf1, buf2, nb_sectors, &pnum);
1349                 if (ret || pnum != nb_sectors) {
1350                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1351                             sectors_to_bytes(
1352                                 ret ? sector_num : sector_num + pnum));
1353                     ret = 1;
1354                     goto out;
1355                 }
1356             }
1357         } else {
1358 
1359             if (allocated1) {
1360                 ret = check_empty_sectors(blk1, sector_num, nb_sectors,
1361                                           filename1, buf1, quiet);
1362             } else {
1363                 ret = check_empty_sectors(blk2, sector_num, nb_sectors,
1364                                           filename2, buf1, quiet);
1365             }
1366             if (ret) {
1367                 if (ret < 0) {
1368                     error_report("Error while reading offset %" PRId64 ": %s",
1369                                  sectors_to_bytes(sector_num), strerror(-ret));
1370                     ret = 4;
1371                 }
1372                 goto out;
1373             }
1374         }
1375         sector_num += nb_sectors;
1376         qemu_progress_print(((float) nb_sectors / progress_base)*100, 100);
1377     }
1378 
1379     if (total_sectors1 != total_sectors2) {
1380         BlockBackend *blk_over;
1381         int64_t total_sectors_over;
1382         const char *filename_over;
1383 
1384         qprintf(quiet, "Warning: Image size mismatch!\n");
1385         if (total_sectors1 > total_sectors2) {
1386             total_sectors_over = total_sectors1;
1387             blk_over = blk1;
1388             filename_over = filename1;
1389         } else {
1390             total_sectors_over = total_sectors2;
1391             blk_over = blk2;
1392             filename_over = filename2;
1393         }
1394 
1395         for (;;) {
1396             nb_sectors = sectors_to_process(total_sectors_over, sector_num);
1397             if (nb_sectors <= 0) {
1398                 break;
1399             }
1400             ret = bdrv_is_allocated_above(blk_bs(blk_over), NULL, sector_num,
1401                                           nb_sectors, &pnum);
1402             if (ret < 0) {
1403                 ret = 3;
1404                 error_report("Sector allocation test failed for %s",
1405                              filename_over);
1406                 goto out;
1407 
1408             }
1409             nb_sectors = pnum;
1410             if (ret) {
1411                 ret = check_empty_sectors(blk_over, sector_num, nb_sectors,
1412                                           filename_over, buf1, quiet);
1413                 if (ret) {
1414                     if (ret < 0) {
1415                         error_report("Error while reading offset %" PRId64
1416                                      " of %s: %s", sectors_to_bytes(sector_num),
1417                                      filename_over, strerror(-ret));
1418                         ret = 4;
1419                     }
1420                     goto out;
1421                 }
1422             }
1423             sector_num += nb_sectors;
1424             qemu_progress_print(((float) nb_sectors / progress_base)*100, 100);
1425         }
1426     }
1427 
1428     qprintf(quiet, "Images are identical.\n");
1429     ret = 0;
1430 
1431 out:
1432     qemu_vfree(buf1);
1433     qemu_vfree(buf2);
1434     blk_unref(blk2);
1435 out2:
1436     blk_unref(blk1);
1437 out3:
1438     qemu_progress_end();
1439 out4:
1440     return ret;
1441 }
1442 
1443 enum ImgConvertBlockStatus {
1444     BLK_DATA,
1445     BLK_ZERO,
1446     BLK_BACKING_FILE,
1447 };
1448 
1449 typedef struct ImgConvertState {
1450     BlockBackend **src;
1451     int64_t *src_sectors;
1452     int src_cur, src_num;
1453     int64_t src_cur_offset;
1454     int64_t total_sectors;
1455     int64_t allocated_sectors;
1456     enum ImgConvertBlockStatus status;
1457     int64_t sector_next_status;
1458     BlockBackend *target;
1459     bool has_zero_init;
1460     bool compressed;
1461     bool target_has_backing;
1462     int min_sparse;
1463     size_t cluster_sectors;
1464     size_t buf_sectors;
1465 } ImgConvertState;
1466 
1467 static void convert_select_part(ImgConvertState *s, int64_t sector_num)
1468 {
1469     assert(sector_num >= s->src_cur_offset);
1470     while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) {
1471         s->src_cur_offset += s->src_sectors[s->src_cur];
1472         s->src_cur++;
1473         assert(s->src_cur < s->src_num);
1474     }
1475 }
1476 
1477 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1478 {
1479     int64_t ret;
1480     int n;
1481 
1482     convert_select_part(s, sector_num);
1483 
1484     assert(s->total_sectors > sector_num);
1485     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1486 
1487     if (s->sector_next_status <= sector_num) {
1488         BlockDriverState *file;
1489         ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]),
1490                                     sector_num - s->src_cur_offset,
1491                                     n, &n, &file);
1492         if (ret < 0) {
1493             return ret;
1494         }
1495 
1496         if (ret & BDRV_BLOCK_ZERO) {
1497             s->status = BLK_ZERO;
1498         } else if (ret & BDRV_BLOCK_DATA) {
1499             s->status = BLK_DATA;
1500         } else if (!s->target_has_backing) {
1501             /* Without a target backing file we must copy over the contents of
1502              * the backing file as well. */
1503             /* Check block status of the backing file chain to avoid
1504              * needlessly reading zeroes and limiting the iteration to the
1505              * buffer size */
1506             ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL,
1507                                               sector_num - s->src_cur_offset,
1508                                               n, &n, &file);
1509             if (ret < 0) {
1510                 return ret;
1511             }
1512 
1513             if (ret & BDRV_BLOCK_ZERO) {
1514                 s->status = BLK_ZERO;
1515             } else {
1516                 s->status = BLK_DATA;
1517             }
1518         } else {
1519             s->status = BLK_BACKING_FILE;
1520         }
1521 
1522         s->sector_next_status = sector_num + n;
1523     }
1524 
1525     n = MIN(n, s->sector_next_status - sector_num);
1526     if (s->status == BLK_DATA) {
1527         n = MIN(n, s->buf_sectors);
1528     }
1529 
1530     /* We need to write complete clusters for compressed images, so if an
1531      * unallocated area is shorter than that, we must consider the whole
1532      * cluster allocated. */
1533     if (s->compressed) {
1534         if (n < s->cluster_sectors) {
1535             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1536             s->status = BLK_DATA;
1537         } else {
1538             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1539         }
1540     }
1541 
1542     return n;
1543 }
1544 
1545 static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
1546                         uint8_t *buf)
1547 {
1548     int n;
1549     int ret;
1550 
1551     assert(nb_sectors <= s->buf_sectors);
1552     while (nb_sectors > 0) {
1553         BlockBackend *blk;
1554         int64_t bs_sectors;
1555 
1556         /* In the case of compression with multiple source files, we can get a
1557          * nb_sectors that spreads into the next part. So we must be able to
1558          * read across multiple BDSes for one convert_read() call. */
1559         convert_select_part(s, sector_num);
1560         blk = s->src[s->src_cur];
1561         bs_sectors = s->src_sectors[s->src_cur];
1562 
1563         n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
1564         ret = blk_pread(blk,
1565                         (sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS,
1566                         buf, n << BDRV_SECTOR_BITS);
1567         if (ret < 0) {
1568             return ret;
1569         }
1570 
1571         sector_num += n;
1572         nb_sectors -= n;
1573         buf += n * BDRV_SECTOR_SIZE;
1574     }
1575 
1576     return 0;
1577 }
1578 
1579 static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
1580                          const uint8_t *buf)
1581 {
1582     int ret;
1583 
1584     while (nb_sectors > 0) {
1585         int n = nb_sectors;
1586 
1587         switch (s->status) {
1588         case BLK_BACKING_FILE:
1589             /* If we have a backing file, leave clusters unallocated that are
1590              * unallocated in the source image, so that the backing file is
1591              * visible at the respective offset. */
1592             assert(s->target_has_backing);
1593             break;
1594 
1595         case BLK_DATA:
1596             /* We must always write compressed clusters as a whole, so don't
1597              * try to find zeroed parts in the buffer. We can only save the
1598              * write if the buffer is completely zeroed and we're allowed to
1599              * keep the target sparse. */
1600             if (s->compressed) {
1601                 if (s->has_zero_init && s->min_sparse &&
1602                     buffer_is_zero(buf, n * BDRV_SECTOR_SIZE))
1603                 {
1604                     assert(!s->target_has_backing);
1605                     break;
1606                 }
1607 
1608                 ret = blk_pwrite_compressed(s->target,
1609                                             sector_num << BDRV_SECTOR_BITS,
1610                                             buf, n << BDRV_SECTOR_BITS);
1611                 if (ret < 0) {
1612                     return ret;
1613                 }
1614                 break;
1615             }
1616 
1617             /* If there is real non-zero data or we're told to keep the target
1618              * fully allocated (-S 0), we must write it. Otherwise we can treat
1619              * it as zero sectors. */
1620             if (!s->min_sparse ||
1621                 is_allocated_sectors_min(buf, n, &n, s->min_sparse))
1622             {
1623                 ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1624                                  buf, n << BDRV_SECTOR_BITS, 0);
1625                 if (ret < 0) {
1626                     return ret;
1627                 }
1628                 break;
1629             }
1630             /* fall-through */
1631 
1632         case BLK_ZERO:
1633             if (s->has_zero_init) {
1634                 break;
1635             }
1636             ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
1637                                     n << BDRV_SECTOR_BITS, 0);
1638             if (ret < 0) {
1639                 return ret;
1640             }
1641             break;
1642         }
1643 
1644         sector_num += n;
1645         nb_sectors -= n;
1646         buf += n * BDRV_SECTOR_SIZE;
1647     }
1648 
1649     return 0;
1650 }
1651 
1652 static int convert_do_copy(ImgConvertState *s)
1653 {
1654     uint8_t *buf = NULL;
1655     int64_t sector_num, allocated_done;
1656     int ret;
1657     int n;
1658 
1659     /* Check whether we have zero initialisation or can get it efficiently */
1660     s->has_zero_init = s->min_sparse && !s->target_has_backing
1661                      ? bdrv_has_zero_init(blk_bs(s->target))
1662                      : false;
1663 
1664     if (!s->has_zero_init && !s->target_has_backing &&
1665         bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
1666     {
1667         ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP);
1668         if (ret == 0) {
1669             s->has_zero_init = true;
1670         }
1671     }
1672 
1673     /* Allocate buffer for copied data. For compressed images, only one cluster
1674      * can be copied at a time. */
1675     if (s->compressed) {
1676         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
1677             error_report("invalid cluster size");
1678             ret = -EINVAL;
1679             goto fail;
1680         }
1681         s->buf_sectors = s->cluster_sectors;
1682     }
1683     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1684 
1685     /* Calculate allocated sectors for progress */
1686     s->allocated_sectors = 0;
1687     sector_num = 0;
1688     while (sector_num < s->total_sectors) {
1689         n = convert_iteration_sectors(s, sector_num);
1690         if (n < 0) {
1691             ret = n;
1692             goto fail;
1693         }
1694         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
1695         {
1696             s->allocated_sectors += n;
1697         }
1698         sector_num += n;
1699     }
1700 
1701     /* Do the copy */
1702     s->src_cur = 0;
1703     s->src_cur_offset = 0;
1704     s->sector_next_status = 0;
1705 
1706     sector_num = 0;
1707     allocated_done = 0;
1708 
1709     while (sector_num < s->total_sectors) {
1710         n = convert_iteration_sectors(s, sector_num);
1711         if (n < 0) {
1712             ret = n;
1713             goto fail;
1714         }
1715         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
1716         {
1717             allocated_done += n;
1718             qemu_progress_print(100.0 * allocated_done / s->allocated_sectors,
1719                                 0);
1720         }
1721 
1722         if (s->status == BLK_DATA) {
1723             ret = convert_read(s, sector_num, n, buf);
1724             if (ret < 0) {
1725                 error_report("error while reading sector %" PRId64
1726                              ": %s", sector_num, strerror(-ret));
1727                 goto fail;
1728             }
1729         } else if (!s->min_sparse && s->status == BLK_ZERO) {
1730             n = MIN(n, s->buf_sectors);
1731             memset(buf, 0, n * BDRV_SECTOR_SIZE);
1732             s->status = BLK_DATA;
1733         }
1734 
1735         ret = convert_write(s, sector_num, n, buf);
1736         if (ret < 0) {
1737             error_report("error while writing sector %" PRId64
1738                          ": %s", sector_num, strerror(-ret));
1739             goto fail;
1740         }
1741 
1742         sector_num += n;
1743     }
1744 
1745     if (s->compressed) {
1746         /* signal EOF to align */
1747         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
1748         if (ret < 0) {
1749             goto fail;
1750         }
1751     }
1752 
1753     ret = 0;
1754 fail:
1755     qemu_vfree(buf);
1756     return ret;
1757 }
1758 
1759 static int img_convert(int argc, char **argv)
1760 {
1761     int c, bs_n, bs_i, compress, cluster_sectors, skip_create;
1762     int64_t ret = 0;
1763     int progress = 0, flags, src_flags;
1764     bool writethrough, src_writethrough;
1765     const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
1766     BlockDriver *drv, *proto_drv;
1767     BlockBackend **blk = NULL, *out_blk = NULL;
1768     BlockDriverState **bs = NULL, *out_bs = NULL;
1769     int64_t total_sectors;
1770     int64_t *bs_sectors = NULL;
1771     size_t bufsectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE;
1772     BlockDriverInfo bdi;
1773     QemuOpts *opts = NULL;
1774     QemuOptsList *create_opts = NULL;
1775     const char *out_baseimg_param;
1776     char *options = NULL;
1777     const char *snapshot_name = NULL;
1778     int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */
1779     bool quiet = false;
1780     Error *local_err = NULL;
1781     QemuOpts *sn_opts = NULL;
1782     ImgConvertState state;
1783     bool image_opts = false;
1784 
1785     fmt = NULL;
1786     out_fmt = "raw";
1787     cache = "unsafe";
1788     src_cache = BDRV_DEFAULT_CACHE;
1789     out_baseimg = NULL;
1790     compress = 0;
1791     skip_create = 0;
1792     for(;;) {
1793         static const struct option long_options[] = {
1794             {"help", no_argument, 0, 'h'},
1795             {"object", required_argument, 0, OPTION_OBJECT},
1796             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1797             {0, 0, 0, 0}
1798         };
1799         c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn",
1800                         long_options, NULL);
1801         if (c == -1) {
1802             break;
1803         }
1804         switch(c) {
1805         case '?':
1806         case 'h':
1807             help();
1808             break;
1809         case 'f':
1810             fmt = optarg;
1811             break;
1812         case 'O':
1813             out_fmt = optarg;
1814             break;
1815         case 'B':
1816             out_baseimg = optarg;
1817             break;
1818         case 'c':
1819             compress = 1;
1820             break;
1821         case 'e':
1822             error_report("option -e is deprecated, please use \'-o "
1823                   "encryption\' instead!");
1824             ret = -1;
1825             goto fail_getopt;
1826         case '6':
1827             error_report("option -6 is deprecated, please use \'-o "
1828                   "compat6\' instead!");
1829             ret = -1;
1830             goto fail_getopt;
1831         case 'o':
1832             if (!is_valid_option_list(optarg)) {
1833                 error_report("Invalid option list: %s", optarg);
1834                 ret = -1;
1835                 goto fail_getopt;
1836             }
1837             if (!options) {
1838                 options = g_strdup(optarg);
1839             } else {
1840                 char *old_options = options;
1841                 options = g_strdup_printf("%s,%s", options, optarg);
1842                 g_free(old_options);
1843             }
1844             break;
1845         case 's':
1846             snapshot_name = optarg;
1847             break;
1848         case 'l':
1849             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
1850                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
1851                                                   optarg, false);
1852                 if (!sn_opts) {
1853                     error_report("Failed in parsing snapshot param '%s'",
1854                                  optarg);
1855                     ret = -1;
1856                     goto fail_getopt;
1857                 }
1858             } else {
1859                 snapshot_name = optarg;
1860             }
1861             break;
1862         case 'S':
1863         {
1864             int64_t sval;
1865             char *end;
1866             sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
1867             if (sval < 0 || *end) {
1868                 error_report("Invalid minimum zero buffer size for sparse output specified");
1869                 ret = -1;
1870                 goto fail_getopt;
1871             }
1872 
1873             min_sparse = sval / BDRV_SECTOR_SIZE;
1874             break;
1875         }
1876         case 'p':
1877             progress = 1;
1878             break;
1879         case 't':
1880             cache = optarg;
1881             break;
1882         case 'T':
1883             src_cache = optarg;
1884             break;
1885         case 'q':
1886             quiet = true;
1887             break;
1888         case 'n':
1889             skip_create = 1;
1890             break;
1891         case OPTION_OBJECT:
1892             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1893                                            optarg, true);
1894             if (!opts) {
1895                 goto fail_getopt;
1896             }
1897             break;
1898         case OPTION_IMAGE_OPTS:
1899             image_opts = true;
1900             break;
1901         }
1902     }
1903 
1904     if (qemu_opts_foreach(&qemu_object_opts,
1905                           user_creatable_add_opts_foreach,
1906                           NULL, NULL)) {
1907         goto fail_getopt;
1908     }
1909 
1910     /* Initialize before goto out */
1911     if (quiet) {
1912         progress = 0;
1913     }
1914     qemu_progress_init(progress, 1.0);
1915 
1916     bs_n = argc - optind - 1;
1917     out_filename = bs_n >= 1 ? argv[argc - 1] : NULL;
1918 
1919     if (options && has_help_option(options)) {
1920         ret = print_block_option_help(out_filename, out_fmt);
1921         goto out;
1922     }
1923 
1924     if (bs_n < 1) {
1925         error_exit("Must specify image file name");
1926     }
1927 
1928 
1929     if (bs_n > 1 && out_baseimg) {
1930         error_report("-B makes no sense when concatenating multiple input "
1931                      "images");
1932         ret = -1;
1933         goto out;
1934     }
1935 
1936     src_flags = 0;
1937     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
1938     if (ret < 0) {
1939         error_report("Invalid source cache option: %s", src_cache);
1940         goto out;
1941     }
1942 
1943     qemu_progress_print(0, 100);
1944 
1945     blk = g_new0(BlockBackend *, bs_n);
1946     bs = g_new0(BlockDriverState *, bs_n);
1947     bs_sectors = g_new(int64_t, bs_n);
1948 
1949     total_sectors = 0;
1950     for (bs_i = 0; bs_i < bs_n; bs_i++) {
1951         blk[bs_i] = img_open(image_opts, argv[optind + bs_i],
1952                              fmt, src_flags, src_writethrough, quiet);
1953         if (!blk[bs_i]) {
1954             ret = -1;
1955             goto out;
1956         }
1957         bs[bs_i] = blk_bs(blk[bs_i]);
1958         bs_sectors[bs_i] = blk_nb_sectors(blk[bs_i]);
1959         if (bs_sectors[bs_i] < 0) {
1960             error_report("Could not get size of %s: %s",
1961                          argv[optind + bs_i], strerror(-bs_sectors[bs_i]));
1962             ret = -1;
1963             goto out;
1964         }
1965         total_sectors += bs_sectors[bs_i];
1966     }
1967 
1968     if (sn_opts) {
1969         ret = bdrv_snapshot_load_tmp(bs[0],
1970                                      qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
1971                                      qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
1972                                      &local_err);
1973     } else if (snapshot_name != NULL) {
1974         if (bs_n > 1) {
1975             error_report("No support for concatenating multiple snapshot");
1976             ret = -1;
1977             goto out;
1978         }
1979 
1980         bdrv_snapshot_load_tmp_by_id_or_name(bs[0], snapshot_name, &local_err);
1981     }
1982     if (local_err) {
1983         error_reportf_err(local_err, "Failed to load snapshot: ");
1984         ret = -1;
1985         goto out;
1986     }
1987 
1988     /* Find driver and parse its options */
1989     drv = bdrv_find_format(out_fmt);
1990     if (!drv) {
1991         error_report("Unknown file format '%s'", out_fmt);
1992         ret = -1;
1993         goto out;
1994     }
1995 
1996     proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
1997     if (!proto_drv) {
1998         error_report_err(local_err);
1999         ret = -1;
2000         goto out;
2001     }
2002 
2003     if (!skip_create) {
2004         if (!drv->create_opts) {
2005             error_report("Format driver '%s' does not support image creation",
2006                          drv->format_name);
2007             ret = -1;
2008             goto out;
2009         }
2010 
2011         if (!proto_drv->create_opts) {
2012             error_report("Protocol driver '%s' does not support image creation",
2013                          proto_drv->format_name);
2014             ret = -1;
2015             goto out;
2016         }
2017 
2018         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2019         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2020 
2021         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2022         if (options) {
2023             qemu_opts_do_parse(opts, options, NULL, &local_err);
2024             if (local_err) {
2025                 error_report_err(local_err);
2026                 ret = -1;
2027                 goto out;
2028             }
2029         }
2030 
2031         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_sectors * 512,
2032                             &error_abort);
2033         ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2034         if (ret < 0) {
2035             goto out;
2036         }
2037     }
2038 
2039     /* Get backing file name if -o backing_file was used */
2040     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2041     if (out_baseimg_param) {
2042         out_baseimg = out_baseimg_param;
2043     }
2044 
2045     /* Check if compression is supported */
2046     if (compress) {
2047         bool encryption =
2048             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2049         const char *preallocation =
2050             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2051 
2052         if (!drv->bdrv_co_pwritev_compressed) {
2053             error_report("Compression not supported for this file format");
2054             ret = -1;
2055             goto out;
2056         }
2057 
2058         if (encryption) {
2059             error_report("Compression and encryption not supported at "
2060                          "the same time");
2061             ret = -1;
2062             goto out;
2063         }
2064 
2065         if (preallocation
2066             && strcmp(preallocation, "off"))
2067         {
2068             error_report("Compression and preallocation not supported at "
2069                          "the same time");
2070             ret = -1;
2071             goto out;
2072         }
2073     }
2074 
2075     if (!skip_create) {
2076         /* Create the new image */
2077         ret = bdrv_create(drv, out_filename, opts, &local_err);
2078         if (ret < 0) {
2079             error_reportf_err(local_err, "%s: error while converting %s: ",
2080                               out_filename, out_fmt);
2081             goto out;
2082         }
2083     }
2084 
2085     flags = min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2086     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2087     if (ret < 0) {
2088         error_report("Invalid cache option: %s", cache);
2089         goto out;
2090     }
2091 
2092     /* XXX we should allow --image-opts to trigger use of
2093      * img_open() here, but then we have trouble with
2094      * the bdrv_create() call which takes different params.
2095      * Not critical right now, so fix can wait...
2096      */
2097     out_blk = img_open_file(out_filename, out_fmt, flags, writethrough, quiet);
2098     if (!out_blk) {
2099         ret = -1;
2100         goto out;
2101     }
2102     out_bs = blk_bs(out_blk);
2103 
2104     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2105      * or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
2106      * as maximum. */
2107     bufsectors = MIN(32768,
2108                      MAX(bufsectors,
2109                          MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2110                              out_bs->bl.pdiscard_alignment >>
2111                              BDRV_SECTOR_BITS)));
2112 
2113     if (skip_create) {
2114         int64_t output_sectors = blk_nb_sectors(out_blk);
2115         if (output_sectors < 0) {
2116             error_report("unable to get output image length: %s",
2117                          strerror(-output_sectors));
2118             ret = -1;
2119             goto out;
2120         } else if (output_sectors < total_sectors) {
2121             error_report("output file is smaller than input file");
2122             ret = -1;
2123             goto out;
2124         }
2125     }
2126 
2127     cluster_sectors = 0;
2128     ret = bdrv_get_info(out_bs, &bdi);
2129     if (ret < 0) {
2130         if (compress) {
2131             error_report("could not get block driver info");
2132             goto out;
2133         }
2134     } else {
2135         compress = compress || bdi.needs_compressed_writes;
2136         cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2137     }
2138 
2139     state = (ImgConvertState) {
2140         .src                = blk,
2141         .src_sectors        = bs_sectors,
2142         .src_num            = bs_n,
2143         .total_sectors      = total_sectors,
2144         .target             = out_blk,
2145         .compressed         = compress,
2146         .target_has_backing = (bool) out_baseimg,
2147         .min_sparse         = min_sparse,
2148         .cluster_sectors    = cluster_sectors,
2149         .buf_sectors        = bufsectors,
2150     };
2151     ret = convert_do_copy(&state);
2152 
2153 out:
2154     if (!ret) {
2155         qemu_progress_print(100, 0);
2156     }
2157     qemu_progress_end();
2158     qemu_opts_del(opts);
2159     qemu_opts_free(create_opts);
2160     qemu_opts_del(sn_opts);
2161     blk_unref(out_blk);
2162     g_free(bs);
2163     if (blk) {
2164         for (bs_i = 0; bs_i < bs_n; bs_i++) {
2165             blk_unref(blk[bs_i]);
2166         }
2167         g_free(blk);
2168     }
2169     g_free(bs_sectors);
2170 fail_getopt:
2171     g_free(options);
2172 
2173     if (ret) {
2174         return 1;
2175     }
2176     return 0;
2177 }
2178 
2179 
2180 static void dump_snapshots(BlockDriverState *bs)
2181 {
2182     QEMUSnapshotInfo *sn_tab, *sn;
2183     int nb_sns, i;
2184 
2185     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2186     if (nb_sns <= 0)
2187         return;
2188     printf("Snapshot list:\n");
2189     bdrv_snapshot_dump(fprintf, stdout, NULL);
2190     printf("\n");
2191     for(i = 0; i < nb_sns; i++) {
2192         sn = &sn_tab[i];
2193         bdrv_snapshot_dump(fprintf, stdout, sn);
2194         printf("\n");
2195     }
2196     g_free(sn_tab);
2197 }
2198 
2199 static void dump_json_image_info_list(ImageInfoList *list)
2200 {
2201     QString *str;
2202     QObject *obj;
2203     Visitor *v = qobject_output_visitor_new(&obj);
2204 
2205     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2206     visit_complete(v, &obj);
2207     str = qobject_to_json_pretty(obj);
2208     assert(str != NULL);
2209     printf("%s\n", qstring_get_str(str));
2210     qobject_decref(obj);
2211     visit_free(v);
2212     QDECREF(str);
2213 }
2214 
2215 static void dump_json_image_info(ImageInfo *info)
2216 {
2217     QString *str;
2218     QObject *obj;
2219     Visitor *v = qobject_output_visitor_new(&obj);
2220 
2221     visit_type_ImageInfo(v, NULL, &info, &error_abort);
2222     visit_complete(v, &obj);
2223     str = qobject_to_json_pretty(obj);
2224     assert(str != NULL);
2225     printf("%s\n", qstring_get_str(str));
2226     qobject_decref(obj);
2227     visit_free(v);
2228     QDECREF(str);
2229 }
2230 
2231 static void dump_human_image_info_list(ImageInfoList *list)
2232 {
2233     ImageInfoList *elem;
2234     bool delim = false;
2235 
2236     for (elem = list; elem; elem = elem->next) {
2237         if (delim) {
2238             printf("\n");
2239         }
2240         delim = true;
2241 
2242         bdrv_image_info_dump(fprintf, stdout, elem->value);
2243     }
2244 }
2245 
2246 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2247 {
2248     return strcmp(a, b) == 0;
2249 }
2250 
2251 /**
2252  * Open an image file chain and return an ImageInfoList
2253  *
2254  * @filename: topmost image filename
2255  * @fmt: topmost image format (may be NULL to autodetect)
2256  * @chain: true  - enumerate entire backing file chain
2257  *         false - only topmost image file
2258  *
2259  * Returns a list of ImageInfo objects or NULL if there was an error opening an
2260  * image file.  If there was an error a message will have been printed to
2261  * stderr.
2262  */
2263 static ImageInfoList *collect_image_info_list(bool image_opts,
2264                                               const char *filename,
2265                                               const char *fmt,
2266                                               bool chain)
2267 {
2268     ImageInfoList *head = NULL;
2269     ImageInfoList **last = &head;
2270     GHashTable *filenames;
2271     Error *err = NULL;
2272 
2273     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2274 
2275     while (filename) {
2276         BlockBackend *blk;
2277         BlockDriverState *bs;
2278         ImageInfo *info;
2279         ImageInfoList *elem;
2280 
2281         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2282             error_report("Backing file '%s' creates an infinite loop.",
2283                          filename);
2284             goto err;
2285         }
2286         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2287 
2288         blk = img_open(image_opts, filename, fmt,
2289                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false);
2290         if (!blk) {
2291             goto err;
2292         }
2293         bs = blk_bs(blk);
2294 
2295         bdrv_query_image_info(bs, &info, &err);
2296         if (err) {
2297             error_report_err(err);
2298             blk_unref(blk);
2299             goto err;
2300         }
2301 
2302         elem = g_new0(ImageInfoList, 1);
2303         elem->value = info;
2304         *last = elem;
2305         last = &elem->next;
2306 
2307         blk_unref(blk);
2308 
2309         filename = fmt = NULL;
2310         if (chain) {
2311             if (info->has_full_backing_filename) {
2312                 filename = info->full_backing_filename;
2313             } else if (info->has_backing_filename) {
2314                 error_report("Could not determine absolute backing filename,"
2315                              " but backing filename '%s' present",
2316                              info->backing_filename);
2317                 goto err;
2318             }
2319             if (info->has_backing_filename_format) {
2320                 fmt = info->backing_filename_format;
2321             }
2322         }
2323     }
2324     g_hash_table_destroy(filenames);
2325     return head;
2326 
2327 err:
2328     qapi_free_ImageInfoList(head);
2329     g_hash_table_destroy(filenames);
2330     return NULL;
2331 }
2332 
2333 static int img_info(int argc, char **argv)
2334 {
2335     int c;
2336     OutputFormat output_format = OFORMAT_HUMAN;
2337     bool chain = false;
2338     const char *filename, *fmt, *output;
2339     ImageInfoList *list;
2340     bool image_opts = false;
2341 
2342     fmt = NULL;
2343     output = NULL;
2344     for(;;) {
2345         int option_index = 0;
2346         static const struct option long_options[] = {
2347             {"help", no_argument, 0, 'h'},
2348             {"format", required_argument, 0, 'f'},
2349             {"output", required_argument, 0, OPTION_OUTPUT},
2350             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2351             {"object", required_argument, 0, OPTION_OBJECT},
2352             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2353             {0, 0, 0, 0}
2354         };
2355         c = getopt_long(argc, argv, "f:h",
2356                         long_options, &option_index);
2357         if (c == -1) {
2358             break;
2359         }
2360         switch(c) {
2361         case '?':
2362         case 'h':
2363             help();
2364             break;
2365         case 'f':
2366             fmt = optarg;
2367             break;
2368         case OPTION_OUTPUT:
2369             output = optarg;
2370             break;
2371         case OPTION_BACKING_CHAIN:
2372             chain = true;
2373             break;
2374         case OPTION_OBJECT: {
2375             QemuOpts *opts;
2376             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2377                                            optarg, true);
2378             if (!opts) {
2379                 return 1;
2380             }
2381         }   break;
2382         case OPTION_IMAGE_OPTS:
2383             image_opts = true;
2384             break;
2385         }
2386     }
2387     if (optind != argc - 1) {
2388         error_exit("Expecting one image file name");
2389     }
2390     filename = argv[optind++];
2391 
2392     if (output && !strcmp(output, "json")) {
2393         output_format = OFORMAT_JSON;
2394     } else if (output && !strcmp(output, "human")) {
2395         output_format = OFORMAT_HUMAN;
2396     } else if (output) {
2397         error_report("--output must be used with human or json as argument.");
2398         return 1;
2399     }
2400 
2401     if (qemu_opts_foreach(&qemu_object_opts,
2402                           user_creatable_add_opts_foreach,
2403                           NULL, NULL)) {
2404         return 1;
2405     }
2406 
2407     list = collect_image_info_list(image_opts, filename, fmt, chain);
2408     if (!list) {
2409         return 1;
2410     }
2411 
2412     switch (output_format) {
2413     case OFORMAT_HUMAN:
2414         dump_human_image_info_list(list);
2415         break;
2416     case OFORMAT_JSON:
2417         if (chain) {
2418             dump_json_image_info_list(list);
2419         } else {
2420             dump_json_image_info(list->value);
2421         }
2422         break;
2423     }
2424 
2425     qapi_free_ImageInfoList(list);
2426     return 0;
2427 }
2428 
2429 static void dump_map_entry(OutputFormat output_format, MapEntry *e,
2430                            MapEntry *next)
2431 {
2432     switch (output_format) {
2433     case OFORMAT_HUMAN:
2434         if (e->data && !e->has_offset) {
2435             error_report("File contains external, encrypted or compressed clusters.");
2436             exit(1);
2437         }
2438         if (e->data && !e->zero) {
2439             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2440                    e->start, e->length,
2441                    e->has_offset ? e->offset : 0,
2442                    e->has_filename ? e->filename : "");
2443         }
2444         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2445          * Modify the flags here to allow more coalescing.
2446          */
2447         if (next && (!next->data || next->zero)) {
2448             next->data = false;
2449             next->zero = true;
2450         }
2451         break;
2452     case OFORMAT_JSON:
2453         printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2454                " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2455                (e->start == 0 ? "[" : ",\n"),
2456                e->start, e->length, e->depth,
2457                e->zero ? "true" : "false",
2458                e->data ? "true" : "false");
2459         if (e->has_offset) {
2460             printf(", \"offset\": %"PRId64"", e->offset);
2461         }
2462         putchar('}');
2463 
2464         if (!next) {
2465             printf("]\n");
2466         }
2467         break;
2468     }
2469 }
2470 
2471 static int get_block_status(BlockDriverState *bs, int64_t sector_num,
2472                             int nb_sectors, MapEntry *e)
2473 {
2474     int64_t ret;
2475     int depth;
2476     BlockDriverState *file;
2477     bool has_offset;
2478 
2479     /* As an optimization, we could cache the current range of unallocated
2480      * clusters in each file of the chain, and avoid querying the same
2481      * range repeatedly.
2482      */
2483 
2484     depth = 0;
2485     for (;;) {
2486         ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &nb_sectors,
2487                                     &file);
2488         if (ret < 0) {
2489             return ret;
2490         }
2491         assert(nb_sectors);
2492         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2493             break;
2494         }
2495         bs = backing_bs(bs);
2496         if (bs == NULL) {
2497             ret = 0;
2498             break;
2499         }
2500 
2501         depth++;
2502     }
2503 
2504     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2505 
2506     *e = (MapEntry) {
2507         .start = sector_num * BDRV_SECTOR_SIZE,
2508         .length = nb_sectors * BDRV_SECTOR_SIZE,
2509         .data = !!(ret & BDRV_BLOCK_DATA),
2510         .zero = !!(ret & BDRV_BLOCK_ZERO),
2511         .offset = ret & BDRV_BLOCK_OFFSET_MASK,
2512         .has_offset = has_offset,
2513         .depth = depth,
2514         .has_filename = file && has_offset,
2515         .filename = file && has_offset ? file->filename : NULL,
2516     };
2517 
2518     return 0;
2519 }
2520 
2521 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2522 {
2523     if (curr->length == 0) {
2524         return false;
2525     }
2526     if (curr->zero != next->zero ||
2527         curr->data != next->data ||
2528         curr->depth != next->depth ||
2529         curr->has_filename != next->has_filename ||
2530         curr->has_offset != next->has_offset) {
2531         return false;
2532     }
2533     if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2534         return false;
2535     }
2536     if (curr->has_offset && curr->offset + curr->length != next->offset) {
2537         return false;
2538     }
2539     return true;
2540 }
2541 
2542 static int img_map(int argc, char **argv)
2543 {
2544     int c;
2545     OutputFormat output_format = OFORMAT_HUMAN;
2546     BlockBackend *blk;
2547     BlockDriverState *bs;
2548     const char *filename, *fmt, *output;
2549     int64_t length;
2550     MapEntry curr = { .length = 0 }, next;
2551     int ret = 0;
2552     bool image_opts = false;
2553 
2554     fmt = NULL;
2555     output = NULL;
2556     for (;;) {
2557         int option_index = 0;
2558         static const struct option long_options[] = {
2559             {"help", no_argument, 0, 'h'},
2560             {"format", required_argument, 0, 'f'},
2561             {"output", required_argument, 0, OPTION_OUTPUT},
2562             {"object", required_argument, 0, OPTION_OBJECT},
2563             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2564             {0, 0, 0, 0}
2565         };
2566         c = getopt_long(argc, argv, "f:h",
2567                         long_options, &option_index);
2568         if (c == -1) {
2569             break;
2570         }
2571         switch (c) {
2572         case '?':
2573         case 'h':
2574             help();
2575             break;
2576         case 'f':
2577             fmt = optarg;
2578             break;
2579         case OPTION_OUTPUT:
2580             output = optarg;
2581             break;
2582         case OPTION_OBJECT: {
2583             QemuOpts *opts;
2584             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2585                                            optarg, true);
2586             if (!opts) {
2587                 return 1;
2588             }
2589         }   break;
2590         case OPTION_IMAGE_OPTS:
2591             image_opts = true;
2592             break;
2593         }
2594     }
2595     if (optind != argc - 1) {
2596         error_exit("Expecting one image file name");
2597     }
2598     filename = argv[optind];
2599 
2600     if (output && !strcmp(output, "json")) {
2601         output_format = OFORMAT_JSON;
2602     } else if (output && !strcmp(output, "human")) {
2603         output_format = OFORMAT_HUMAN;
2604     } else if (output) {
2605         error_report("--output must be used with human or json as argument.");
2606         return 1;
2607     }
2608 
2609     if (qemu_opts_foreach(&qemu_object_opts,
2610                           user_creatable_add_opts_foreach,
2611                           NULL, NULL)) {
2612         return 1;
2613     }
2614 
2615     blk = img_open(image_opts, filename, fmt, 0, false, false);
2616     if (!blk) {
2617         return 1;
2618     }
2619     bs = blk_bs(blk);
2620 
2621     if (output_format == OFORMAT_HUMAN) {
2622         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
2623     }
2624 
2625     length = blk_getlength(blk);
2626     while (curr.start + curr.length < length) {
2627         int64_t nsectors_left;
2628         int64_t sector_num;
2629         int n;
2630 
2631         sector_num = (curr.start + curr.length) >> BDRV_SECTOR_BITS;
2632 
2633         /* Probe up to 1 GiB at a time.  */
2634         nsectors_left = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE) - sector_num;
2635         n = MIN(1 << (30 - BDRV_SECTOR_BITS), nsectors_left);
2636         ret = get_block_status(bs, sector_num, n, &next);
2637 
2638         if (ret < 0) {
2639             error_report("Could not read file metadata: %s", strerror(-ret));
2640             goto out;
2641         }
2642 
2643         if (entry_mergeable(&curr, &next)) {
2644             curr.length += next.length;
2645             continue;
2646         }
2647 
2648         if (curr.length > 0) {
2649             dump_map_entry(output_format, &curr, &next);
2650         }
2651         curr = next;
2652     }
2653 
2654     dump_map_entry(output_format, &curr, NULL);
2655 
2656 out:
2657     blk_unref(blk);
2658     return ret < 0;
2659 }
2660 
2661 #define SNAPSHOT_LIST   1
2662 #define SNAPSHOT_CREATE 2
2663 #define SNAPSHOT_APPLY  3
2664 #define SNAPSHOT_DELETE 4
2665 
2666 static int img_snapshot(int argc, char **argv)
2667 {
2668     BlockBackend *blk;
2669     BlockDriverState *bs;
2670     QEMUSnapshotInfo sn;
2671     char *filename, *snapshot_name = NULL;
2672     int c, ret = 0, bdrv_oflags;
2673     int action = 0;
2674     qemu_timeval tv;
2675     bool quiet = false;
2676     Error *err = NULL;
2677     bool image_opts = false;
2678 
2679     bdrv_oflags = BDRV_O_RDWR;
2680     /* Parse commandline parameters */
2681     for(;;) {
2682         static const struct option long_options[] = {
2683             {"help", no_argument, 0, 'h'},
2684             {"object", required_argument, 0, OPTION_OBJECT},
2685             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2686             {0, 0, 0, 0}
2687         };
2688         c = getopt_long(argc, argv, "la:c:d:hq",
2689                         long_options, NULL);
2690         if (c == -1) {
2691             break;
2692         }
2693         switch(c) {
2694         case '?':
2695         case 'h':
2696             help();
2697             return 0;
2698         case 'l':
2699             if (action) {
2700                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2701                 return 0;
2702             }
2703             action = SNAPSHOT_LIST;
2704             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
2705             break;
2706         case 'a':
2707             if (action) {
2708                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2709                 return 0;
2710             }
2711             action = SNAPSHOT_APPLY;
2712             snapshot_name = optarg;
2713             break;
2714         case 'c':
2715             if (action) {
2716                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2717                 return 0;
2718             }
2719             action = SNAPSHOT_CREATE;
2720             snapshot_name = optarg;
2721             break;
2722         case 'd':
2723             if (action) {
2724                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
2725                 return 0;
2726             }
2727             action = SNAPSHOT_DELETE;
2728             snapshot_name = optarg;
2729             break;
2730         case 'q':
2731             quiet = true;
2732             break;
2733         case OPTION_OBJECT: {
2734             QemuOpts *opts;
2735             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2736                                            optarg, true);
2737             if (!opts) {
2738                 return 1;
2739             }
2740         }   break;
2741         case OPTION_IMAGE_OPTS:
2742             image_opts = true;
2743             break;
2744         }
2745     }
2746 
2747     if (optind != argc - 1) {
2748         error_exit("Expecting one image file name");
2749     }
2750     filename = argv[optind++];
2751 
2752     if (qemu_opts_foreach(&qemu_object_opts,
2753                           user_creatable_add_opts_foreach,
2754                           NULL, NULL)) {
2755         return 1;
2756     }
2757 
2758     /* Open the image */
2759     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet);
2760     if (!blk) {
2761         return 1;
2762     }
2763     bs = blk_bs(blk);
2764 
2765     /* Perform the requested action */
2766     switch(action) {
2767     case SNAPSHOT_LIST:
2768         dump_snapshots(bs);
2769         break;
2770 
2771     case SNAPSHOT_CREATE:
2772         memset(&sn, 0, sizeof(sn));
2773         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
2774 
2775         qemu_gettimeofday(&tv);
2776         sn.date_sec = tv.tv_sec;
2777         sn.date_nsec = tv.tv_usec * 1000;
2778 
2779         ret = bdrv_snapshot_create(bs, &sn);
2780         if (ret) {
2781             error_report("Could not create snapshot '%s': %d (%s)",
2782                 snapshot_name, ret, strerror(-ret));
2783         }
2784         break;
2785 
2786     case SNAPSHOT_APPLY:
2787         ret = bdrv_snapshot_goto(bs, snapshot_name);
2788         if (ret) {
2789             error_report("Could not apply snapshot '%s': %d (%s)",
2790                 snapshot_name, ret, strerror(-ret));
2791         }
2792         break;
2793 
2794     case SNAPSHOT_DELETE:
2795         bdrv_snapshot_delete_by_id_or_name(bs, snapshot_name, &err);
2796         if (err) {
2797             error_reportf_err(err, "Could not delete snapshot '%s': ",
2798                               snapshot_name);
2799             ret = 1;
2800         }
2801         break;
2802     }
2803 
2804     /* Cleanup */
2805     blk_unref(blk);
2806     if (ret) {
2807         return 1;
2808     }
2809     return 0;
2810 }
2811 
2812 static int img_rebase(int argc, char **argv)
2813 {
2814     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
2815     uint8_t *buf_old = NULL;
2816     uint8_t *buf_new = NULL;
2817     BlockDriverState *bs = NULL;
2818     char *filename;
2819     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
2820     int c, flags, src_flags, ret;
2821     bool writethrough, src_writethrough;
2822     int unsafe = 0;
2823     int progress = 0;
2824     bool quiet = false;
2825     Error *local_err = NULL;
2826     bool image_opts = false;
2827 
2828     /* Parse commandline parameters */
2829     fmt = NULL;
2830     cache = BDRV_DEFAULT_CACHE;
2831     src_cache = BDRV_DEFAULT_CACHE;
2832     out_baseimg = NULL;
2833     out_basefmt = NULL;
2834     for(;;) {
2835         static const struct option long_options[] = {
2836             {"help", no_argument, 0, 'h'},
2837             {"object", required_argument, 0, OPTION_OBJECT},
2838             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2839             {0, 0, 0, 0}
2840         };
2841         c = getopt_long(argc, argv, "hf:F:b:upt:T:q",
2842                         long_options, NULL);
2843         if (c == -1) {
2844             break;
2845         }
2846         switch(c) {
2847         case '?':
2848         case 'h':
2849             help();
2850             return 0;
2851         case 'f':
2852             fmt = optarg;
2853             break;
2854         case 'F':
2855             out_basefmt = optarg;
2856             break;
2857         case 'b':
2858             out_baseimg = optarg;
2859             break;
2860         case 'u':
2861             unsafe = 1;
2862             break;
2863         case 'p':
2864             progress = 1;
2865             break;
2866         case 't':
2867             cache = optarg;
2868             break;
2869         case 'T':
2870             src_cache = optarg;
2871             break;
2872         case 'q':
2873             quiet = true;
2874             break;
2875         case OPTION_OBJECT: {
2876             QemuOpts *opts;
2877             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2878                                            optarg, true);
2879             if (!opts) {
2880                 return 1;
2881             }
2882         }   break;
2883         case OPTION_IMAGE_OPTS:
2884             image_opts = true;
2885             break;
2886         }
2887     }
2888 
2889     if (quiet) {
2890         progress = 0;
2891     }
2892 
2893     if (optind != argc - 1) {
2894         error_exit("Expecting one image file name");
2895     }
2896     if (!unsafe && !out_baseimg) {
2897         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
2898     }
2899     filename = argv[optind++];
2900 
2901     if (qemu_opts_foreach(&qemu_object_opts,
2902                           user_creatable_add_opts_foreach,
2903                           NULL, NULL)) {
2904         return 1;
2905     }
2906 
2907     qemu_progress_init(progress, 2.0);
2908     qemu_progress_print(0, 100);
2909 
2910     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
2911     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2912     if (ret < 0) {
2913         error_report("Invalid cache option: %s", cache);
2914         goto out;
2915     }
2916 
2917     src_flags = 0;
2918     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2919     if (ret < 0) {
2920         error_report("Invalid source cache option: %s", src_cache);
2921         goto out;
2922     }
2923 
2924     /* The source files are opened read-only, don't care about WCE */
2925     assert((src_flags & BDRV_O_RDWR) == 0);
2926     (void) src_writethrough;
2927 
2928     /*
2929      * Open the images.
2930      *
2931      * Ignore the old backing file for unsafe rebase in case we want to correct
2932      * the reference to a renamed or moved backing file.
2933      */
2934     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
2935     if (!blk) {
2936         ret = -1;
2937         goto out;
2938     }
2939     bs = blk_bs(blk);
2940 
2941     if (out_basefmt != NULL) {
2942         if (bdrv_find_format(out_basefmt) == NULL) {
2943             error_report("Invalid format name: '%s'", out_basefmt);
2944             ret = -1;
2945             goto out;
2946         }
2947     }
2948 
2949     /* For safe rebasing we need to compare old and new backing file */
2950     if (!unsafe) {
2951         char backing_name[PATH_MAX];
2952         QDict *options = NULL;
2953 
2954         if (bs->backing_format[0] != '\0') {
2955             options = qdict_new();
2956             qdict_put(options, "driver", qstring_from_str(bs->backing_format));
2957         }
2958 
2959         bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
2960         blk_old_backing = blk_new_open(backing_name, NULL,
2961                                        options, src_flags, &local_err);
2962         if (!blk_old_backing) {
2963             error_reportf_err(local_err,
2964                               "Could not open old backing file '%s': ",
2965                               backing_name);
2966             ret = -1;
2967             goto out;
2968         }
2969 
2970         if (out_baseimg[0]) {
2971             if (out_basefmt) {
2972                 options = qdict_new();
2973                 qdict_put(options, "driver", qstring_from_str(out_basefmt));
2974             } else {
2975                 options = NULL;
2976             }
2977 
2978             blk_new_backing = blk_new_open(out_baseimg, NULL,
2979                                            options, src_flags, &local_err);
2980             if (!blk_new_backing) {
2981                 error_reportf_err(local_err,
2982                                   "Could not open new backing file '%s': ",
2983                                   out_baseimg);
2984                 ret = -1;
2985                 goto out;
2986             }
2987         }
2988     }
2989 
2990     /*
2991      * Check each unallocated cluster in the COW file. If it is unallocated,
2992      * accesses go to the backing file. We must therefore compare this cluster
2993      * in the old and new backing file, and if they differ we need to copy it
2994      * from the old backing file into the COW file.
2995      *
2996      * If qemu-img crashes during this step, no harm is done. The content of
2997      * the image is the same as the original one at any time.
2998      */
2999     if (!unsafe) {
3000         int64_t num_sectors;
3001         int64_t old_backing_num_sectors;
3002         int64_t new_backing_num_sectors = 0;
3003         uint64_t sector;
3004         int n;
3005         float local_progress = 0;
3006 
3007         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3008         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3009 
3010         num_sectors = blk_nb_sectors(blk);
3011         if (num_sectors < 0) {
3012             error_report("Could not get size of '%s': %s",
3013                          filename, strerror(-num_sectors));
3014             ret = -1;
3015             goto out;
3016         }
3017         old_backing_num_sectors = blk_nb_sectors(blk_old_backing);
3018         if (old_backing_num_sectors < 0) {
3019             char backing_name[PATH_MAX];
3020 
3021             bdrv_get_backing_filename(bs, backing_name, sizeof(backing_name));
3022             error_report("Could not get size of '%s': %s",
3023                          backing_name, strerror(-old_backing_num_sectors));
3024             ret = -1;
3025             goto out;
3026         }
3027         if (blk_new_backing) {
3028             new_backing_num_sectors = blk_nb_sectors(blk_new_backing);
3029             if (new_backing_num_sectors < 0) {
3030                 error_report("Could not get size of '%s': %s",
3031                              out_baseimg, strerror(-new_backing_num_sectors));
3032                 ret = -1;
3033                 goto out;
3034             }
3035         }
3036 
3037         if (num_sectors != 0) {
3038             local_progress = (float)100 /
3039                 (num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512));
3040         }
3041 
3042         for (sector = 0; sector < num_sectors; sector += n) {
3043 
3044             /* How many sectors can we handle with the next read? */
3045             if (sector + (IO_BUF_SIZE / 512) <= num_sectors) {
3046                 n = (IO_BUF_SIZE / 512);
3047             } else {
3048                 n = num_sectors - sector;
3049             }
3050 
3051             /* If the cluster is allocated, we don't need to take action */
3052             ret = bdrv_is_allocated(bs, sector, n, &n);
3053             if (ret < 0) {
3054                 error_report("error while reading image metadata: %s",
3055                              strerror(-ret));
3056                 goto out;
3057             }
3058             if (ret) {
3059                 continue;
3060             }
3061 
3062             /*
3063              * Read old and new backing file and take into consideration that
3064              * backing files may be smaller than the COW image.
3065              */
3066             if (sector >= old_backing_num_sectors) {
3067                 memset(buf_old, 0, n * BDRV_SECTOR_SIZE);
3068             } else {
3069                 if (sector + n > old_backing_num_sectors) {
3070                     n = old_backing_num_sectors - sector;
3071                 }
3072 
3073                 ret = blk_pread(blk_old_backing, sector << BDRV_SECTOR_BITS,
3074                                 buf_old, n << BDRV_SECTOR_BITS);
3075                 if (ret < 0) {
3076                     error_report("error while reading from old backing file");
3077                     goto out;
3078                 }
3079             }
3080 
3081             if (sector >= new_backing_num_sectors || !blk_new_backing) {
3082                 memset(buf_new, 0, n * BDRV_SECTOR_SIZE);
3083             } else {
3084                 if (sector + n > new_backing_num_sectors) {
3085                     n = new_backing_num_sectors - sector;
3086                 }
3087 
3088                 ret = blk_pread(blk_new_backing, sector << BDRV_SECTOR_BITS,
3089                                 buf_new, n << BDRV_SECTOR_BITS);
3090                 if (ret < 0) {
3091                     error_report("error while reading from new backing file");
3092                     goto out;
3093                 }
3094             }
3095 
3096             /* If they differ, we need to write to the COW file */
3097             uint64_t written = 0;
3098 
3099             while (written < n) {
3100                 int pnum;
3101 
3102                 if (compare_sectors(buf_old + written * 512,
3103                     buf_new + written * 512, n - written, &pnum))
3104                 {
3105                     ret = blk_pwrite(blk,
3106                                      (sector + written) << BDRV_SECTOR_BITS,
3107                                      buf_old + written * 512,
3108                                      pnum << BDRV_SECTOR_BITS, 0);
3109                     if (ret < 0) {
3110                         error_report("Error while writing to COW image: %s",
3111                             strerror(-ret));
3112                         goto out;
3113                     }
3114                 }
3115 
3116                 written += pnum;
3117             }
3118             qemu_progress_print(local_progress, 100);
3119         }
3120     }
3121 
3122     /*
3123      * Change the backing file. All clusters that are different from the old
3124      * backing file are overwritten in the COW file now, so the visible content
3125      * doesn't change when we switch the backing file.
3126      */
3127     if (out_baseimg && *out_baseimg) {
3128         ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3129     } else {
3130         ret = bdrv_change_backing_file(bs, NULL, NULL);
3131     }
3132 
3133     if (ret == -ENOSPC) {
3134         error_report("Could not change the backing file to '%s': No "
3135                      "space left in the file header", out_baseimg);
3136     } else if (ret < 0) {
3137         error_report("Could not change the backing file to '%s': %s",
3138             out_baseimg, strerror(-ret));
3139     }
3140 
3141     qemu_progress_print(100, 0);
3142     /*
3143      * TODO At this point it is possible to check if any clusters that are
3144      * allocated in the COW file are the same in the backing file. If so, they
3145      * could be dropped from the COW file. Don't do this before switching the
3146      * backing file, in case of a crash this would lead to corruption.
3147      */
3148 out:
3149     qemu_progress_end();
3150     /* Cleanup */
3151     if (!unsafe) {
3152         blk_unref(blk_old_backing);
3153         blk_unref(blk_new_backing);
3154     }
3155     qemu_vfree(buf_old);
3156     qemu_vfree(buf_new);
3157 
3158     blk_unref(blk);
3159     if (ret) {
3160         return 1;
3161     }
3162     return 0;
3163 }
3164 
3165 static int img_resize(int argc, char **argv)
3166 {
3167     Error *err = NULL;
3168     int c, ret, relative;
3169     const char *filename, *fmt, *size;
3170     int64_t n, total_size;
3171     bool quiet = false;
3172     BlockBackend *blk = NULL;
3173     QemuOpts *param;
3174 
3175     static QemuOptsList resize_options = {
3176         .name = "resize_options",
3177         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3178         .desc = {
3179             {
3180                 .name = BLOCK_OPT_SIZE,
3181                 .type = QEMU_OPT_SIZE,
3182                 .help = "Virtual disk size"
3183             }, {
3184                 /* end of list */
3185             }
3186         },
3187     };
3188     bool image_opts = false;
3189 
3190     /* Remove size from argv manually so that negative numbers are not treated
3191      * as options by getopt. */
3192     if (argc < 3) {
3193         error_exit("Not enough arguments");
3194         return 1;
3195     }
3196 
3197     size = argv[--argc];
3198 
3199     /* Parse getopt arguments */
3200     fmt = NULL;
3201     for(;;) {
3202         static const struct option long_options[] = {
3203             {"help", no_argument, 0, 'h'},
3204             {"object", required_argument, 0, OPTION_OBJECT},
3205             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3206             {0, 0, 0, 0}
3207         };
3208         c = getopt_long(argc, argv, "f:hq",
3209                         long_options, NULL);
3210         if (c == -1) {
3211             break;
3212         }
3213         switch(c) {
3214         case '?':
3215         case 'h':
3216             help();
3217             break;
3218         case 'f':
3219             fmt = optarg;
3220             break;
3221         case 'q':
3222             quiet = true;
3223             break;
3224         case OPTION_OBJECT: {
3225             QemuOpts *opts;
3226             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3227                                            optarg, true);
3228             if (!opts) {
3229                 return 1;
3230             }
3231         }   break;
3232         case OPTION_IMAGE_OPTS:
3233             image_opts = true;
3234             break;
3235         }
3236     }
3237     if (optind != argc - 1) {
3238         error_exit("Expecting one image file name");
3239     }
3240     filename = argv[optind++];
3241 
3242     if (qemu_opts_foreach(&qemu_object_opts,
3243                           user_creatable_add_opts_foreach,
3244                           NULL, NULL)) {
3245         return 1;
3246     }
3247 
3248     /* Choose grow, shrink, or absolute resize mode */
3249     switch (size[0]) {
3250     case '+':
3251         relative = 1;
3252         size++;
3253         break;
3254     case '-':
3255         relative = -1;
3256         size++;
3257         break;
3258     default:
3259         relative = 0;
3260         break;
3261     }
3262 
3263     /* Parse size */
3264     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3265     qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3266     if (err) {
3267         error_report_err(err);
3268         ret = -1;
3269         qemu_opts_del(param);
3270         goto out;
3271     }
3272     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3273     qemu_opts_del(param);
3274 
3275     blk = img_open(image_opts, filename, fmt,
3276                    BDRV_O_RDWR, false, quiet);
3277     if (!blk) {
3278         ret = -1;
3279         goto out;
3280     }
3281 
3282     if (relative) {
3283         total_size = blk_getlength(blk) + n * relative;
3284     } else {
3285         total_size = n;
3286     }
3287     if (total_size <= 0) {
3288         error_report("New image size must be positive");
3289         ret = -1;
3290         goto out;
3291     }
3292 
3293     ret = blk_truncate(blk, total_size);
3294     switch (ret) {
3295     case 0:
3296         qprintf(quiet, "Image resized.\n");
3297         break;
3298     case -ENOTSUP:
3299         error_report("This image does not support resize");
3300         break;
3301     case -EACCES:
3302         error_report("Image is read-only");
3303         break;
3304     default:
3305         error_report("Error resizing image: %s", strerror(-ret));
3306         break;
3307     }
3308 out:
3309     blk_unref(blk);
3310     if (ret) {
3311         return 1;
3312     }
3313     return 0;
3314 }
3315 
3316 static void amend_status_cb(BlockDriverState *bs,
3317                             int64_t offset, int64_t total_work_size,
3318                             void *opaque)
3319 {
3320     qemu_progress_print(100.f * offset / total_work_size, 0);
3321 }
3322 
3323 static int img_amend(int argc, char **argv)
3324 {
3325     Error *err = NULL;
3326     int c, ret = 0;
3327     char *options = NULL;
3328     QemuOptsList *create_opts = NULL;
3329     QemuOpts *opts = NULL;
3330     const char *fmt = NULL, *filename, *cache;
3331     int flags;
3332     bool writethrough;
3333     bool quiet = false, progress = false;
3334     BlockBackend *blk = NULL;
3335     BlockDriverState *bs = NULL;
3336     bool image_opts = false;
3337 
3338     cache = BDRV_DEFAULT_CACHE;
3339     for (;;) {
3340         static const struct option long_options[] = {
3341             {"help", no_argument, 0, 'h'},
3342             {"object", required_argument, 0, OPTION_OBJECT},
3343             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3344             {0, 0, 0, 0}
3345         };
3346         c = getopt_long(argc, argv, "ho:f:t:pq",
3347                         long_options, NULL);
3348         if (c == -1) {
3349             break;
3350         }
3351 
3352         switch (c) {
3353             case 'h':
3354             case '?':
3355                 help();
3356                 break;
3357             case 'o':
3358                 if (!is_valid_option_list(optarg)) {
3359                     error_report("Invalid option list: %s", optarg);
3360                     ret = -1;
3361                     goto out_no_progress;
3362                 }
3363                 if (!options) {
3364                     options = g_strdup(optarg);
3365                 } else {
3366                     char *old_options = options;
3367                     options = g_strdup_printf("%s,%s", options, optarg);
3368                     g_free(old_options);
3369                 }
3370                 break;
3371             case 'f':
3372                 fmt = optarg;
3373                 break;
3374             case 't':
3375                 cache = optarg;
3376                 break;
3377             case 'p':
3378                 progress = true;
3379                 break;
3380             case 'q':
3381                 quiet = true;
3382                 break;
3383             case OPTION_OBJECT:
3384                 opts = qemu_opts_parse_noisily(&qemu_object_opts,
3385                                                optarg, true);
3386                 if (!opts) {
3387                     ret = -1;
3388                     goto out_no_progress;
3389                 }
3390                 break;
3391             case OPTION_IMAGE_OPTS:
3392                 image_opts = true;
3393                 break;
3394         }
3395     }
3396 
3397     if (!options) {
3398         error_exit("Must specify options (-o)");
3399     }
3400 
3401     if (qemu_opts_foreach(&qemu_object_opts,
3402                           user_creatable_add_opts_foreach,
3403                           NULL, NULL)) {
3404         ret = -1;
3405         goto out_no_progress;
3406     }
3407 
3408     if (quiet) {
3409         progress = false;
3410     }
3411     qemu_progress_init(progress, 1.0);
3412 
3413     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
3414     if (fmt && has_help_option(options)) {
3415         /* If a format is explicitly specified (and possibly no filename is
3416          * given), print option help here */
3417         ret = print_block_option_help(filename, fmt);
3418         goto out;
3419     }
3420 
3421     if (optind != argc - 1) {
3422         error_report("Expecting one image file name");
3423         ret = -1;
3424         goto out;
3425     }
3426 
3427     flags = BDRV_O_RDWR;
3428     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3429     if (ret < 0) {
3430         error_report("Invalid cache option: %s", cache);
3431         goto out;
3432     }
3433 
3434     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
3435     if (!blk) {
3436         ret = -1;
3437         goto out;
3438     }
3439     bs = blk_bs(blk);
3440 
3441     fmt = bs->drv->format_name;
3442 
3443     if (has_help_option(options)) {
3444         /* If the format was auto-detected, print option help here */
3445         ret = print_block_option_help(filename, fmt);
3446         goto out;
3447     }
3448 
3449     if (!bs->drv->create_opts) {
3450         error_report("Format driver '%s' does not support any options to amend",
3451                      fmt);
3452         ret = -1;
3453         goto out;
3454     }
3455 
3456     create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
3457     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3458     qemu_opts_do_parse(opts, options, NULL, &err);
3459     if (err) {
3460         error_report_err(err);
3461         ret = -1;
3462         goto out;
3463     }
3464 
3465     /* In case the driver does not call amend_status_cb() */
3466     qemu_progress_print(0.f, 0);
3467     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL);
3468     qemu_progress_print(100.f, 0);
3469     if (ret < 0) {
3470         error_report("Error while amending options: %s", strerror(-ret));
3471         goto out;
3472     }
3473 
3474 out:
3475     qemu_progress_end();
3476 
3477 out_no_progress:
3478     blk_unref(blk);
3479     qemu_opts_del(opts);
3480     qemu_opts_free(create_opts);
3481     g_free(options);
3482 
3483     if (ret) {
3484         return 1;
3485     }
3486     return 0;
3487 }
3488 
3489 typedef struct BenchData {
3490     BlockBackend *blk;
3491     uint64_t image_size;
3492     bool write;
3493     int bufsize;
3494     int step;
3495     int nrreq;
3496     int n;
3497     int flush_interval;
3498     bool drain_on_flush;
3499     uint8_t *buf;
3500     QEMUIOVector *qiov;
3501 
3502     int in_flight;
3503     bool in_flush;
3504     uint64_t offset;
3505 } BenchData;
3506 
3507 static void bench_undrained_flush_cb(void *opaque, int ret)
3508 {
3509     if (ret < 0) {
3510         error_report("Failed flush request: %s", strerror(-ret));
3511         exit(EXIT_FAILURE);
3512     }
3513 }
3514 
3515 static void bench_cb(void *opaque, int ret)
3516 {
3517     BenchData *b = opaque;
3518     BlockAIOCB *acb;
3519 
3520     if (ret < 0) {
3521         error_report("Failed request: %s", strerror(-ret));
3522         exit(EXIT_FAILURE);
3523     }
3524 
3525     if (b->in_flush) {
3526         /* Just finished a flush with drained queue: Start next requests */
3527         assert(b->in_flight == 0);
3528         b->in_flush = false;
3529     } else if (b->in_flight > 0) {
3530         int remaining = b->n - b->in_flight;
3531 
3532         b->n--;
3533         b->in_flight--;
3534 
3535         /* Time for flush? Drain queue if requested, then flush */
3536         if (b->flush_interval && remaining % b->flush_interval == 0) {
3537             if (!b->in_flight || !b->drain_on_flush) {
3538                 BlockCompletionFunc *cb;
3539 
3540                 if (b->drain_on_flush) {
3541                     b->in_flush = true;
3542                     cb = bench_cb;
3543                 } else {
3544                     cb = bench_undrained_flush_cb;
3545                 }
3546 
3547                 acb = blk_aio_flush(b->blk, cb, b);
3548                 if (!acb) {
3549                     error_report("Failed to issue flush request");
3550                     exit(EXIT_FAILURE);
3551                 }
3552             }
3553             if (b->drain_on_flush) {
3554                 return;
3555             }
3556         }
3557     }
3558 
3559     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
3560         int64_t offset = b->offset;
3561         /* blk_aio_* might look for completed I/Os and kick bench_cb
3562          * again, so make sure this operation is counted by in_flight
3563          * and b->offset is ready for the next submission.
3564          */
3565         b->in_flight++;
3566         b->offset += b->step;
3567         b->offset %= b->image_size;
3568         if (b->write) {
3569             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
3570         } else {
3571             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
3572         }
3573         if (!acb) {
3574             error_report("Failed to issue request");
3575             exit(EXIT_FAILURE);
3576         }
3577     }
3578 }
3579 
3580 static int img_bench(int argc, char **argv)
3581 {
3582     int c, ret = 0;
3583     const char *fmt = NULL, *filename;
3584     bool quiet = false;
3585     bool image_opts = false;
3586     bool is_write = false;
3587     int count = 75000;
3588     int depth = 64;
3589     int64_t offset = 0;
3590     size_t bufsize = 4096;
3591     int pattern = 0;
3592     size_t step = 0;
3593     int flush_interval = 0;
3594     bool drain_on_flush = true;
3595     int64_t image_size;
3596     BlockBackend *blk = NULL;
3597     BenchData data = {};
3598     int flags = 0;
3599     bool writethrough = false;
3600     struct timeval t1, t2;
3601     int i;
3602 
3603     for (;;) {
3604         static const struct option long_options[] = {
3605             {"help", no_argument, 0, 'h'},
3606             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
3607             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3608             {"pattern", required_argument, 0, OPTION_PATTERN},
3609             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
3610             {0, 0, 0, 0}
3611         };
3612         c = getopt_long(argc, argv, "hc:d:f:no:qs:S:t:w", long_options, NULL);
3613         if (c == -1) {
3614             break;
3615         }
3616 
3617         switch (c) {
3618         case 'h':
3619         case '?':
3620             help();
3621             break;
3622         case 'c':
3623         {
3624             char *end;
3625             errno = 0;
3626             count = strtoul(optarg, &end, 0);
3627             if (errno || *end || count > INT_MAX) {
3628                 error_report("Invalid request count specified");
3629                 return 1;
3630             }
3631             break;
3632         }
3633         case 'd':
3634         {
3635             char *end;
3636             errno = 0;
3637             depth = strtoul(optarg, &end, 0);
3638             if (errno || *end || depth > INT_MAX) {
3639                 error_report("Invalid queue depth specified");
3640                 return 1;
3641             }
3642             break;
3643         }
3644         case 'f':
3645             fmt = optarg;
3646             break;
3647         case 'n':
3648             flags |= BDRV_O_NATIVE_AIO;
3649             break;
3650         case 'o':
3651         {
3652             char *end;
3653             errno = 0;
3654             offset = qemu_strtosz_suffix(optarg, &end,
3655                                          QEMU_STRTOSZ_DEFSUFFIX_B);
3656             if (offset < 0|| *end) {
3657                 error_report("Invalid offset specified");
3658                 return 1;
3659             }
3660             break;
3661         }
3662             break;
3663         case 'q':
3664             quiet = true;
3665             break;
3666         case 's':
3667         {
3668             int64_t sval;
3669             char *end;
3670 
3671             sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
3672             if (sval < 0 || sval > INT_MAX || *end) {
3673                 error_report("Invalid buffer size specified");
3674                 return 1;
3675             }
3676 
3677             bufsize = sval;
3678             break;
3679         }
3680         case 'S':
3681         {
3682             int64_t sval;
3683             char *end;
3684 
3685             sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
3686             if (sval < 0 || sval > INT_MAX || *end) {
3687                 error_report("Invalid step size specified");
3688                 return 1;
3689             }
3690 
3691             step = sval;
3692             break;
3693         }
3694         case 't':
3695             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
3696             if (ret < 0) {
3697                 error_report("Invalid cache mode");
3698                 ret = -1;
3699                 goto out;
3700             }
3701             break;
3702         case 'w':
3703             flags |= BDRV_O_RDWR;
3704             is_write = true;
3705             break;
3706         case OPTION_PATTERN:
3707         {
3708             char *end;
3709             errno = 0;
3710             pattern = strtoul(optarg, &end, 0);
3711             if (errno || *end || pattern > 0xff) {
3712                 error_report("Invalid pattern byte specified");
3713                 return 1;
3714             }
3715             break;
3716         }
3717         case OPTION_FLUSH_INTERVAL:
3718         {
3719             char *end;
3720             errno = 0;
3721             flush_interval = strtoul(optarg, &end, 0);
3722             if (errno || *end || flush_interval > INT_MAX) {
3723                 error_report("Invalid flush interval specified");
3724                 return 1;
3725             }
3726             break;
3727         }
3728         case OPTION_NO_DRAIN:
3729             drain_on_flush = false;
3730             break;
3731         case OPTION_IMAGE_OPTS:
3732             image_opts = true;
3733             break;
3734         }
3735     }
3736 
3737     if (optind != argc - 1) {
3738         error_exit("Expecting one image file name");
3739     }
3740     filename = argv[argc - 1];
3741 
3742     if (!is_write && flush_interval) {
3743         error_report("--flush-interval is only available in write tests");
3744         ret = -1;
3745         goto out;
3746     }
3747     if (flush_interval && flush_interval < depth) {
3748         error_report("Flush interval can't be smaller than depth");
3749         ret = -1;
3750         goto out;
3751     }
3752 
3753     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet);
3754     if (!blk) {
3755         ret = -1;
3756         goto out;
3757     }
3758 
3759     image_size = blk_getlength(blk);
3760     if (image_size < 0) {
3761         ret = image_size;
3762         goto out;
3763     }
3764 
3765     data = (BenchData) {
3766         .blk            = blk,
3767         .image_size     = image_size,
3768         .bufsize        = bufsize,
3769         .step           = step ?: bufsize,
3770         .nrreq          = depth,
3771         .n              = count,
3772         .offset         = offset,
3773         .write          = is_write,
3774         .flush_interval = flush_interval,
3775         .drain_on_flush = drain_on_flush,
3776     };
3777     printf("Sending %d %s requests, %d bytes each, %d in parallel "
3778            "(starting at offset %" PRId64 ", step size %d)\n",
3779            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
3780            data.offset, data.step);
3781     if (flush_interval) {
3782         printf("Sending flush every %d requests\n", flush_interval);
3783     }
3784 
3785     data.buf = blk_blockalign(blk, data.nrreq * data.bufsize);
3786     memset(data.buf, pattern, data.nrreq * data.bufsize);
3787 
3788     data.qiov = g_new(QEMUIOVector, data.nrreq);
3789     for (i = 0; i < data.nrreq; i++) {
3790         qemu_iovec_init(&data.qiov[i], 1);
3791         qemu_iovec_add(&data.qiov[i],
3792                        data.buf + i * data.bufsize, data.bufsize);
3793     }
3794 
3795     gettimeofday(&t1, NULL);
3796     bench_cb(&data, 0);
3797 
3798     while (data.n > 0) {
3799         main_loop_wait(false);
3800     }
3801     gettimeofday(&t2, NULL);
3802 
3803     printf("Run completed in %3.3f seconds.\n",
3804            (t2.tv_sec - t1.tv_sec)
3805            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
3806 
3807 out:
3808     qemu_vfree(data.buf);
3809     blk_unref(blk);
3810 
3811     if (ret) {
3812         return 1;
3813     }
3814     return 0;
3815 }
3816 
3817 #define C_BS      01
3818 #define C_COUNT   02
3819 #define C_IF      04
3820 #define C_OF      010
3821 #define C_SKIP    020
3822 
3823 struct DdInfo {
3824     unsigned int flags;
3825     int64_t count;
3826 };
3827 
3828 struct DdIo {
3829     int bsz;    /* Block size */
3830     char *filename;
3831     uint8_t *buf;
3832     int64_t offset;
3833 };
3834 
3835 struct DdOpts {
3836     const char *name;
3837     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
3838     unsigned int flag;
3839 };
3840 
3841 static int img_dd_bs(const char *arg,
3842                      struct DdIo *in, struct DdIo *out,
3843                      struct DdInfo *dd)
3844 {
3845     char *end;
3846     int64_t res;
3847 
3848     res = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
3849 
3850     if (res <= 0 || res > INT_MAX || *end) {
3851         error_report("invalid number: '%s'", arg);
3852         return 1;
3853     }
3854     in->bsz = out->bsz = res;
3855 
3856     return 0;
3857 }
3858 
3859 static int img_dd_count(const char *arg,
3860                         struct DdIo *in, struct DdIo *out,
3861                         struct DdInfo *dd)
3862 {
3863     char *end;
3864 
3865     dd->count = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
3866 
3867     if (dd->count < 0 || *end) {
3868         error_report("invalid number: '%s'", arg);
3869         return 1;
3870     }
3871 
3872     return 0;
3873 }
3874 
3875 static int img_dd_if(const char *arg,
3876                      struct DdIo *in, struct DdIo *out,
3877                      struct DdInfo *dd)
3878 {
3879     in->filename = g_strdup(arg);
3880 
3881     return 0;
3882 }
3883 
3884 static int img_dd_of(const char *arg,
3885                      struct DdIo *in, struct DdIo *out,
3886                      struct DdInfo *dd)
3887 {
3888     out->filename = g_strdup(arg);
3889 
3890     return 0;
3891 }
3892 
3893 static int img_dd_skip(const char *arg,
3894                        struct DdIo *in, struct DdIo *out,
3895                        struct DdInfo *dd)
3896 {
3897     char *end;
3898 
3899     in->offset = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
3900 
3901     if (in->offset < 0 || *end) {
3902         error_report("invalid number: '%s'", arg);
3903         return 1;
3904     }
3905 
3906     return 0;
3907 }
3908 
3909 static int img_dd(int argc, char **argv)
3910 {
3911     int ret = 0;
3912     char *arg = NULL;
3913     char *tmp;
3914     BlockDriver *drv = NULL, *proto_drv = NULL;
3915     BlockBackend *blk1 = NULL, *blk2 = NULL;
3916     QemuOpts *opts = NULL;
3917     QemuOptsList *create_opts = NULL;
3918     Error *local_err = NULL;
3919     bool image_opts = false;
3920     int c, i;
3921     const char *out_fmt = "raw";
3922     const char *fmt = NULL;
3923     int64_t size = 0;
3924     int64_t block_count = 0, out_pos, in_pos;
3925     struct DdInfo dd = {
3926         .flags = 0,
3927         .count = 0,
3928     };
3929     struct DdIo in = {
3930         .bsz = 512, /* Block size is by default 512 bytes */
3931         .filename = NULL,
3932         .buf = NULL,
3933         .offset = 0
3934     };
3935     struct DdIo out = {
3936         .bsz = 512,
3937         .filename = NULL,
3938         .buf = NULL,
3939         .offset = 0
3940     };
3941 
3942     const struct DdOpts options[] = {
3943         { "bs", img_dd_bs, C_BS },
3944         { "count", img_dd_count, C_COUNT },
3945         { "if", img_dd_if, C_IF },
3946         { "of", img_dd_of, C_OF },
3947         { "skip", img_dd_skip, C_SKIP },
3948         { NULL, NULL, 0 }
3949     };
3950     const struct option long_options[] = {
3951         { "help", no_argument, 0, 'h'},
3952         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3953         { 0, 0, 0, 0 }
3954     };
3955 
3956     while ((c = getopt_long(argc, argv, "hf:O:", long_options, NULL))) {
3957         if (c == EOF) {
3958             break;
3959         }
3960         switch (c) {
3961         case 'O':
3962             out_fmt = optarg;
3963             break;
3964         case 'f':
3965             fmt = optarg;
3966             break;
3967         case '?':
3968             error_report("Try 'qemu-img --help' for more information.");
3969             ret = -1;
3970             goto out;
3971         case 'h':
3972             help();
3973             break;
3974         case OPTION_IMAGE_OPTS:
3975             image_opts = true;
3976             break;
3977         }
3978     }
3979 
3980     for (i = optind; i < argc; i++) {
3981         int j;
3982         arg = g_strdup(argv[i]);
3983 
3984         tmp = strchr(arg, '=');
3985         if (tmp == NULL) {
3986             error_report("unrecognized operand %s", arg);
3987             ret = -1;
3988             goto out;
3989         }
3990 
3991         *tmp++ = '\0';
3992 
3993         for (j = 0; options[j].name != NULL; j++) {
3994             if (!strcmp(arg, options[j].name)) {
3995                 break;
3996             }
3997         }
3998         if (options[j].name == NULL) {
3999             error_report("unrecognized operand %s", arg);
4000             ret = -1;
4001             goto out;
4002         }
4003 
4004         if (options[j].f(tmp, &in, &out, &dd) != 0) {
4005             ret = -1;
4006             goto out;
4007         }
4008         dd.flags |= options[j].flag;
4009         g_free(arg);
4010         arg = NULL;
4011     }
4012 
4013     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4014         error_report("Must specify both input and output files");
4015         ret = -1;
4016         goto out;
4017     }
4018     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false);
4019 
4020     if (!blk1) {
4021         ret = -1;
4022         goto out;
4023     }
4024 
4025     drv = bdrv_find_format(out_fmt);
4026     if (!drv) {
4027         error_report("Unknown file format");
4028         ret = -1;
4029         goto out;
4030     }
4031     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4032 
4033     if (!proto_drv) {
4034         error_report_err(local_err);
4035         ret = -1;
4036         goto out;
4037     }
4038     if (!drv->create_opts) {
4039         error_report("Format driver '%s' does not support image creation",
4040                      drv->format_name);
4041         ret = -1;
4042         goto out;
4043     }
4044     if (!proto_drv->create_opts) {
4045         error_report("Protocol driver '%s' does not support image creation",
4046                      proto_drv->format_name);
4047         ret = -1;
4048         goto out;
4049     }
4050     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4051     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4052 
4053     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4054 
4055     size = blk_getlength(blk1);
4056     if (size < 0) {
4057         error_report("Failed to get size for '%s'", in.filename);
4058         ret = -1;
4059         goto out;
4060     }
4061 
4062     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4063         dd.count * in.bsz < size) {
4064         size = dd.count * in.bsz;
4065     }
4066 
4067     /* Overflow means the specified offset is beyond input image's size */
4068     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4069                               size < in.bsz * in.offset)) {
4070         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4071     } else {
4072         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4073                             size - in.bsz * in.offset, &error_abort);
4074     }
4075 
4076     ret = bdrv_create(drv, out.filename, opts, &local_err);
4077     if (ret < 0) {
4078         error_reportf_err(local_err,
4079                           "%s: error while creating output image: ",
4080                           out.filename);
4081         ret = -1;
4082         goto out;
4083     }
4084 
4085     blk2 = img_open(image_opts, out.filename, out_fmt, BDRV_O_RDWR,
4086                     false, false);
4087 
4088     if (!blk2) {
4089         ret = -1;
4090         goto out;
4091     }
4092 
4093     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4094                               size < in.offset * in.bsz)) {
4095         /* We give a warning if the skip option is bigger than the input
4096          * size and create an empty output disk image (i.e. like dd(1)).
4097          */
4098         error_report("%s: cannot skip to specified offset", in.filename);
4099         in_pos = size;
4100     } else {
4101         in_pos = in.offset * in.bsz;
4102     }
4103 
4104     in.buf = g_new(uint8_t, in.bsz);
4105 
4106     for (out_pos = 0; in_pos < size; block_count++) {
4107         int in_ret, out_ret;
4108 
4109         if (in_pos + in.bsz > size) {
4110             in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4111         } else {
4112             in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4113         }
4114         if (in_ret < 0) {
4115             error_report("error while reading from input image file: %s",
4116                          strerror(-in_ret));
4117             ret = -1;
4118             goto out;
4119         }
4120         in_pos += in_ret;
4121 
4122         out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4123 
4124         if (out_ret < 0) {
4125             error_report("error while writing to output image file: %s",
4126                          strerror(-out_ret));
4127             ret = -1;
4128             goto out;
4129         }
4130         out_pos += out_ret;
4131     }
4132 
4133 out:
4134     g_free(arg);
4135     qemu_opts_del(opts);
4136     qemu_opts_free(create_opts);
4137     blk_unref(blk1);
4138     blk_unref(blk2);
4139     g_free(in.filename);
4140     g_free(out.filename);
4141     g_free(in.buf);
4142     g_free(out.buf);
4143 
4144     if (ret) {
4145         return 1;
4146     }
4147     return 0;
4148 }
4149 
4150 
4151 static const img_cmd_t img_cmds[] = {
4152 #define DEF(option, callback, arg_string)        \
4153     { option, callback },
4154 #include "qemu-img-cmds.h"
4155 #undef DEF
4156 #undef GEN_DOCS
4157     { NULL, NULL, },
4158 };
4159 
4160 int main(int argc, char **argv)
4161 {
4162     const img_cmd_t *cmd;
4163     const char *cmdname;
4164     Error *local_error = NULL;
4165     char *trace_file = NULL;
4166     int c;
4167     static const struct option long_options[] = {
4168         {"help", no_argument, 0, 'h'},
4169         {"version", no_argument, 0, 'V'},
4170         {"trace", required_argument, NULL, 'T'},
4171         {0, 0, 0, 0}
4172     };
4173 
4174 #ifdef CONFIG_POSIX
4175     signal(SIGPIPE, SIG_IGN);
4176 #endif
4177 
4178     module_call_init(MODULE_INIT_TRACE);
4179     error_set_progname(argv[0]);
4180     qemu_init_exec_dir(argv[0]);
4181 
4182     if (qemu_init_main_loop(&local_error)) {
4183         error_report_err(local_error);
4184         exit(EXIT_FAILURE);
4185     }
4186 
4187     qcrypto_init(&error_fatal);
4188 
4189     module_call_init(MODULE_INIT_QOM);
4190     bdrv_init();
4191     if (argc < 2) {
4192         error_exit("Not enough arguments");
4193     }
4194 
4195     qemu_add_opts(&qemu_object_opts);
4196     qemu_add_opts(&qemu_source_opts);
4197     qemu_add_opts(&qemu_trace_opts);
4198 
4199     while ((c = getopt_long(argc, argv, "+hVT:", long_options, NULL)) != -1) {
4200         switch (c) {
4201         case 'h':
4202             help();
4203             return 0;
4204         case 'V':
4205             printf(QEMU_IMG_VERSION);
4206             return 0;
4207         case 'T':
4208             g_free(trace_file);
4209             trace_file = trace_opt_parse(optarg);
4210             break;
4211         }
4212     }
4213 
4214     cmdname = argv[optind];
4215 
4216     /* reset getopt_long scanning */
4217     argc -= optind;
4218     if (argc < 1) {
4219         return 0;
4220     }
4221     argv += optind;
4222     optind = 0;
4223 
4224     if (!trace_init_backends()) {
4225         exit(1);
4226     }
4227     trace_init_file(trace_file);
4228     qemu_set_log(LOG_TRACE);
4229 
4230     /* find the command */
4231     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
4232         if (!strcmp(cmdname, cmd->name)) {
4233             return cmd->handler(argc, argv);
4234         }
4235     }
4236 
4237     /* not found */
4238     error_exit("Command not found: %s", cmdname);
4239 }
4240