xref: /openbmc/qemu/qemu-img.c (revision d1972be1)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu-common.h"
29 #include "qemu-version.h"
30 #include "qapi/error.h"
31 #include "qapi/qapi-visit-block-core.h"
32 #include "qapi/qobject-output-visitor.h"
33 #include "qapi/qmp/qjson.h"
34 #include "qapi/qmp/qdict.h"
35 #include "qapi/qmp/qstring.h"
36 #include "qemu/cutils.h"
37 #include "qemu/config-file.h"
38 #include "qemu/option.h"
39 #include "qemu/error-report.h"
40 #include "qemu/log.h"
41 #include "qemu/main-loop.h"
42 #include "qemu/module.h"
43 #include "qemu/units.h"
44 #include "qom/object_interfaces.h"
45 #include "sysemu/block-backend.h"
46 #include "block/block_int.h"
47 #include "block/blockjob.h"
48 #include "block/qapi.h"
49 #include "crypto/init.h"
50 #include "trace/control.h"
51 
52 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
53                           "\n" QEMU_COPYRIGHT "\n"
54 
55 typedef struct img_cmd_t {
56     const char *name;
57     int (*handler)(int argc, char **argv);
58 } img_cmd_t;
59 
60 enum {
61     OPTION_OUTPUT = 256,
62     OPTION_BACKING_CHAIN = 257,
63     OPTION_OBJECT = 258,
64     OPTION_IMAGE_OPTS = 259,
65     OPTION_PATTERN = 260,
66     OPTION_FLUSH_INTERVAL = 261,
67     OPTION_NO_DRAIN = 262,
68     OPTION_TARGET_IMAGE_OPTS = 263,
69     OPTION_SIZE = 264,
70     OPTION_PREALLOCATION = 265,
71     OPTION_SHRINK = 266,
72     OPTION_SALVAGE = 267,
73 };
74 
75 typedef enum OutputFormat {
76     OFORMAT_JSON,
77     OFORMAT_HUMAN,
78 } OutputFormat;
79 
80 /* Default to cache=writeback as data integrity is not important for qemu-img */
81 #define BDRV_DEFAULT_CACHE "writeback"
82 
83 static void format_print(void *opaque, const char *name)
84 {
85     printf(" %s", name);
86 }
87 
88 static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
89 {
90     va_list ap;
91 
92     va_start(ap, fmt);
93     error_vreport(fmt, ap);
94     va_end(ap);
95 
96     error_printf("Try 'qemu-img --help' for more information\n");
97     exit(EXIT_FAILURE);
98 }
99 
100 static void QEMU_NORETURN missing_argument(const char *option)
101 {
102     error_exit("missing argument for option '%s'", option);
103 }
104 
105 static void QEMU_NORETURN unrecognized_option(const char *option)
106 {
107     error_exit("unrecognized option '%s'", option);
108 }
109 
110 /* Please keep in synch with qemu-img.texi */
111 static void QEMU_NORETURN help(void)
112 {
113     const char *help_msg =
114            QEMU_IMG_VERSION
115            "usage: qemu-img [standard options] command [command options]\n"
116            "QEMU disk image utility\n"
117            "\n"
118            "    '-h', '--help'       display this help and exit\n"
119            "    '-V', '--version'    output version information and exit\n"
120            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
121            "                         specify tracing options\n"
122            "\n"
123            "Command syntax:\n"
124 #define DEF(option, callback, arg_string)        \
125            "  " arg_string "\n"
126 #include "qemu-img-cmds.h"
127 #undef DEF
128            "\n"
129            "Command parameters:\n"
130            "  'filename' is a disk image filename\n"
131            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
132            "    manual page for a description of the object properties. The most common\n"
133            "    object type is a 'secret', which is used to supply passwords and/or\n"
134            "    encryption keys.\n"
135            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
136            "  'cache' is the cache mode used to write the output disk image, the valid\n"
137            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
138            "    'directsync' and 'unsafe' (default for convert)\n"
139            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
140            "    options are the same as for the 'cache' option\n"
141            "  'size' is the disk image size in bytes. Optional suffixes\n"
142            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
143            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
144            "    supported. 'b' is ignored.\n"
145            "  'output_filename' is the destination disk image filename\n"
146            "  'output_fmt' is the destination format\n"
147            "  'options' is a comma separated list of format specific options in a\n"
148            "    name=value format. Use -o ? for an overview of the options supported by the\n"
149            "    used format\n"
150            "  'snapshot_param' is param used for internal snapshot, format\n"
151            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
152            "    '[ID_OR_NAME]'\n"
153            "  '-c' indicates that target image must be compressed (qcow format only)\n"
154            "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
155            "       new backing file match exactly. The image doesn't need a working\n"
156            "       backing file before rebasing in this case (useful for renaming the\n"
157            "       backing file). For image creation, allow creating without attempting\n"
158            "       to open the backing file.\n"
159            "  '-h' with or without a command shows this help and lists the supported formats\n"
160            "  '-p' show progress of command (only certain commands)\n"
161            "  '-q' use Quiet mode - do not print any output (except errors)\n"
162            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
163            "       contain only zeros for qemu-img to create a sparse image during\n"
164            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
165            "       unallocated or zero sectors, and the destination image will always be\n"
166            "       fully allocated\n"
167            "  '--output' takes the format in which the output must be done (human or json)\n"
168            "  '-n' skips the target volume creation (useful if the volume is created\n"
169            "       prior to running qemu-img)\n"
170            "\n"
171            "Parameters to check subcommand:\n"
172            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
173            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
174            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
175            "       hiding corruption that has already occurred.\n"
176            "\n"
177            "Parameters to convert subcommand:\n"
178            "  '-m' specifies how many coroutines work in parallel during the convert\n"
179            "       process (defaults to 8)\n"
180            "  '-W' allow to write to the target out of order rather than sequential\n"
181            "\n"
182            "Parameters to snapshot subcommand:\n"
183            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
184            "  '-a' applies a snapshot (revert disk to saved state)\n"
185            "  '-c' creates a snapshot\n"
186            "  '-d' deletes a snapshot\n"
187            "  '-l' lists all snapshots in the given image\n"
188            "\n"
189            "Parameters to compare subcommand:\n"
190            "  '-f' first image format\n"
191            "  '-F' second image format\n"
192            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
193            "\n"
194            "Parameters to dd subcommand:\n"
195            "  'bs=BYTES' read and write up to BYTES bytes at a time "
196            "(default: 512)\n"
197            "  'count=N' copy only N input blocks\n"
198            "  'if=FILE' read from FILE\n"
199            "  'of=FILE' write to FILE\n"
200            "  'skip=N' skip N bs-sized blocks at the start of input\n";
201 
202     printf("%s\nSupported formats:", help_msg);
203     bdrv_iterate_format(format_print, NULL, false);
204     printf("\n\n" QEMU_HELP_BOTTOM "\n");
205     exit(EXIT_SUCCESS);
206 }
207 
208 static QemuOptsList qemu_object_opts = {
209     .name = "object",
210     .implied_opt_name = "qom-type",
211     .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
212     .desc = {
213         { }
214     },
215 };
216 
217 static bool qemu_img_object_print_help(const char *type, QemuOpts *opts)
218 {
219     if (user_creatable_print_help(type, opts)) {
220         exit(0);
221     }
222     return true;
223 }
224 
225 static QemuOptsList qemu_source_opts = {
226     .name = "source",
227     .implied_opt_name = "file",
228     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
229     .desc = {
230         { }
231     },
232 };
233 
234 static int GCC_FMT_ATTR(2, 3) qprintf(bool quiet, const char *fmt, ...)
235 {
236     int ret = 0;
237     if (!quiet) {
238         va_list args;
239         va_start(args, fmt);
240         ret = vprintf(fmt, args);
241         va_end(args);
242     }
243     return ret;
244 }
245 
246 
247 static int print_block_option_help(const char *filename, const char *fmt)
248 {
249     BlockDriver *drv, *proto_drv;
250     QemuOptsList *create_opts = NULL;
251     Error *local_err = NULL;
252 
253     /* Find driver and parse its options */
254     drv = bdrv_find_format(fmt);
255     if (!drv) {
256         error_report("Unknown file format '%s'", fmt);
257         return 1;
258     }
259 
260     if (!drv->create_opts) {
261         error_report("Format driver '%s' does not support image creation", fmt);
262         return 1;
263     }
264 
265     create_opts = qemu_opts_append(create_opts, drv->create_opts);
266     if (filename) {
267         proto_drv = bdrv_find_protocol(filename, true, &local_err);
268         if (!proto_drv) {
269             error_report_err(local_err);
270             qemu_opts_free(create_opts);
271             return 1;
272         }
273         if (!proto_drv->create_opts) {
274             error_report("Protocol driver '%s' does not support image creation",
275                          proto_drv->format_name);
276             qemu_opts_free(create_opts);
277             return 1;
278         }
279         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
280     }
281 
282     if (filename) {
283         printf("Supported options:\n");
284     } else {
285         printf("Supported %s options:\n", fmt);
286     }
287     qemu_opts_print_help(create_opts, false);
288     qemu_opts_free(create_opts);
289 
290     if (!filename) {
291         printf("\n"
292                "The protocol level may support further options.\n"
293                "Specify the target filename to include those options.\n");
294     }
295 
296     return 0;
297 }
298 
299 
300 static BlockBackend *img_open_opts(const char *optstr,
301                                    QemuOpts *opts, int flags, bool writethrough,
302                                    bool quiet, bool force_share)
303 {
304     QDict *options;
305     Error *local_err = NULL;
306     BlockBackend *blk;
307     options = qemu_opts_to_qdict(opts, NULL);
308     if (force_share) {
309         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
310             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
311             error_report("--force-share/-U conflicts with image options");
312             qobject_unref(options);
313             return NULL;
314         }
315         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
316     }
317     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
318     if (!blk) {
319         error_reportf_err(local_err, "Could not open '%s': ", optstr);
320         return NULL;
321     }
322     blk_set_enable_write_cache(blk, !writethrough);
323 
324     return blk;
325 }
326 
327 static BlockBackend *img_open_file(const char *filename,
328                                    QDict *options,
329                                    const char *fmt, int flags,
330                                    bool writethrough, bool quiet,
331                                    bool force_share)
332 {
333     BlockBackend *blk;
334     Error *local_err = NULL;
335 
336     if (!options) {
337         options = qdict_new();
338     }
339     if (fmt) {
340         qdict_put_str(options, "driver", fmt);
341     }
342 
343     if (force_share) {
344         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
345     }
346     blk = blk_new_open(filename, NULL, options, flags, &local_err);
347     if (!blk) {
348         error_reportf_err(local_err, "Could not open '%s': ", filename);
349         return NULL;
350     }
351     blk_set_enable_write_cache(blk, !writethrough);
352 
353     return blk;
354 }
355 
356 
357 static int img_add_key_secrets(void *opaque,
358                                const char *name, const char *value,
359                                Error **errp)
360 {
361     QDict *options = opaque;
362 
363     if (g_str_has_suffix(name, "key-secret")) {
364         qdict_put_str(options, name, value);
365     }
366 
367     return 0;
368 }
369 
370 
371 static BlockBackend *img_open(bool image_opts,
372                               const char *filename,
373                               const char *fmt, int flags, bool writethrough,
374                               bool quiet, bool force_share)
375 {
376     BlockBackend *blk;
377     if (image_opts) {
378         QemuOpts *opts;
379         if (fmt) {
380             error_report("--image-opts and --format are mutually exclusive");
381             return NULL;
382         }
383         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
384                                        filename, true);
385         if (!opts) {
386             return NULL;
387         }
388         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
389                             force_share);
390     } else {
391         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
392                             force_share);
393     }
394     return blk;
395 }
396 
397 
398 static int add_old_style_options(const char *fmt, QemuOpts *opts,
399                                  const char *base_filename,
400                                  const char *base_fmt)
401 {
402     Error *err = NULL;
403 
404     if (base_filename) {
405         qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &err);
406         if (err) {
407             error_report("Backing file not supported for file format '%s'",
408                          fmt);
409             error_free(err);
410             return -1;
411         }
412     }
413     if (base_fmt) {
414         qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &err);
415         if (err) {
416             error_report("Backing file format not supported for file "
417                          "format '%s'", fmt);
418             error_free(err);
419             return -1;
420         }
421     }
422     return 0;
423 }
424 
425 static int64_t cvtnum(const char *s)
426 {
427     int err;
428     uint64_t value;
429 
430     err = qemu_strtosz(s, NULL, &value);
431     if (err < 0) {
432         return err;
433     }
434     if (value > INT64_MAX) {
435         return -ERANGE;
436     }
437     return value;
438 }
439 
440 static int img_create(int argc, char **argv)
441 {
442     int c;
443     uint64_t img_size = -1;
444     const char *fmt = "raw";
445     const char *base_fmt = NULL;
446     const char *filename;
447     const char *base_filename = NULL;
448     char *options = NULL;
449     Error *local_err = NULL;
450     bool quiet = false;
451     int flags = 0;
452 
453     for(;;) {
454         static const struct option long_options[] = {
455             {"help", no_argument, 0, 'h'},
456             {"object", required_argument, 0, OPTION_OBJECT},
457             {0, 0, 0, 0}
458         };
459         c = getopt_long(argc, argv, ":F:b:f:ho:qu",
460                         long_options, NULL);
461         if (c == -1) {
462             break;
463         }
464         switch(c) {
465         case ':':
466             missing_argument(argv[optind - 1]);
467             break;
468         case '?':
469             unrecognized_option(argv[optind - 1]);
470             break;
471         case 'h':
472             help();
473             break;
474         case 'F':
475             base_fmt = optarg;
476             break;
477         case 'b':
478             base_filename = optarg;
479             break;
480         case 'f':
481             fmt = optarg;
482             break;
483         case 'o':
484             if (!is_valid_option_list(optarg)) {
485                 error_report("Invalid option list: %s", optarg);
486                 goto fail;
487             }
488             if (!options) {
489                 options = g_strdup(optarg);
490             } else {
491                 char *old_options = options;
492                 options = g_strdup_printf("%s,%s", options, optarg);
493                 g_free(old_options);
494             }
495             break;
496         case 'q':
497             quiet = true;
498             break;
499         case 'u':
500             flags |= BDRV_O_NO_BACKING;
501             break;
502         case OPTION_OBJECT: {
503             QemuOpts *opts;
504             opts = qemu_opts_parse_noisily(&qemu_object_opts,
505                                            optarg, true);
506             if (!opts) {
507                 goto fail;
508             }
509         }   break;
510         }
511     }
512 
513     /* Get the filename */
514     filename = (optind < argc) ? argv[optind] : NULL;
515     if (options && has_help_option(options)) {
516         g_free(options);
517         return print_block_option_help(filename, fmt);
518     }
519 
520     if (optind >= argc) {
521         error_exit("Expecting image file name");
522     }
523     optind++;
524 
525     if (qemu_opts_foreach(&qemu_object_opts,
526                           user_creatable_add_opts_foreach,
527                           qemu_img_object_print_help, &error_fatal)) {
528         goto fail;
529     }
530 
531     /* Get image size, if specified */
532     if (optind < argc) {
533         int64_t sval;
534 
535         sval = cvtnum(argv[optind++]);
536         if (sval < 0) {
537             if (sval == -ERANGE) {
538                 error_report("Image size must be less than 8 EiB!");
539             } else {
540                 error_report("Invalid image size specified! You may use k, M, "
541                       "G, T, P or E suffixes for ");
542                 error_report("kilobytes, megabytes, gigabytes, terabytes, "
543                              "petabytes and exabytes.");
544             }
545             goto fail;
546         }
547         img_size = (uint64_t)sval;
548     }
549     if (optind != argc) {
550         error_exit("Unexpected argument: %s", argv[optind]);
551     }
552 
553     bdrv_img_create(filename, fmt, base_filename, base_fmt,
554                     options, img_size, flags, quiet, &local_err);
555     if (local_err) {
556         error_reportf_err(local_err, "%s: ", filename);
557         goto fail;
558     }
559 
560     g_free(options);
561     return 0;
562 
563 fail:
564     g_free(options);
565     return 1;
566 }
567 
568 static void dump_json_image_check(ImageCheck *check, bool quiet)
569 {
570     QString *str;
571     QObject *obj;
572     Visitor *v = qobject_output_visitor_new(&obj);
573 
574     visit_type_ImageCheck(v, NULL, &check, &error_abort);
575     visit_complete(v, &obj);
576     str = qobject_to_json_pretty(obj);
577     assert(str != NULL);
578     qprintf(quiet, "%s\n", qstring_get_str(str));
579     qobject_unref(obj);
580     visit_free(v);
581     qobject_unref(str);
582 }
583 
584 static void dump_human_image_check(ImageCheck *check, bool quiet)
585 {
586     if (!(check->corruptions || check->leaks || check->check_errors)) {
587         qprintf(quiet, "No errors were found on the image.\n");
588     } else {
589         if (check->corruptions) {
590             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
591                     "Data may be corrupted, or further writes to the image "
592                     "may corrupt it.\n",
593                     check->corruptions);
594         }
595 
596         if (check->leaks) {
597             qprintf(quiet,
598                     "\n%" PRId64 " leaked clusters were found on the image.\n"
599                     "This means waste of disk space, but no harm to data.\n",
600                     check->leaks);
601         }
602 
603         if (check->check_errors) {
604             qprintf(quiet,
605                     "\n%" PRId64
606                     " internal errors have occurred during the check.\n",
607                     check->check_errors);
608         }
609     }
610 
611     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
612         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
613                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
614                 check->allocated_clusters, check->total_clusters,
615                 check->allocated_clusters * 100.0 / check->total_clusters,
616                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
617                 check->compressed_clusters * 100.0 /
618                 check->allocated_clusters);
619     }
620 
621     if (check->image_end_offset) {
622         qprintf(quiet,
623                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
624     }
625 }
626 
627 static int collect_image_check(BlockDriverState *bs,
628                    ImageCheck *check,
629                    const char *filename,
630                    const char *fmt,
631                    int fix)
632 {
633     int ret;
634     BdrvCheckResult result;
635 
636     ret = bdrv_check(bs, &result, fix);
637     if (ret < 0) {
638         return ret;
639     }
640 
641     check->filename                 = g_strdup(filename);
642     check->format                   = g_strdup(bdrv_get_format_name(bs));
643     check->check_errors             = result.check_errors;
644     check->corruptions              = result.corruptions;
645     check->has_corruptions          = result.corruptions != 0;
646     check->leaks                    = result.leaks;
647     check->has_leaks                = result.leaks != 0;
648     check->corruptions_fixed        = result.corruptions_fixed;
649     check->has_corruptions_fixed    = result.corruptions != 0;
650     check->leaks_fixed              = result.leaks_fixed;
651     check->has_leaks_fixed          = result.leaks != 0;
652     check->image_end_offset         = result.image_end_offset;
653     check->has_image_end_offset     = result.image_end_offset != 0;
654     check->total_clusters           = result.bfi.total_clusters;
655     check->has_total_clusters       = result.bfi.total_clusters != 0;
656     check->allocated_clusters       = result.bfi.allocated_clusters;
657     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
658     check->fragmented_clusters      = result.bfi.fragmented_clusters;
659     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
660     check->compressed_clusters      = result.bfi.compressed_clusters;
661     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
662 
663     return 0;
664 }
665 
666 /*
667  * Checks an image for consistency. Exit codes:
668  *
669  *  0 - Check completed, image is good
670  *  1 - Check not completed because of internal errors
671  *  2 - Check completed, image is corrupted
672  *  3 - Check completed, image has leaked clusters, but is good otherwise
673  * 63 - Checks are not supported by the image format
674  */
675 static int img_check(int argc, char **argv)
676 {
677     int c, ret;
678     OutputFormat output_format = OFORMAT_HUMAN;
679     const char *filename, *fmt, *output, *cache;
680     BlockBackend *blk;
681     BlockDriverState *bs;
682     int fix = 0;
683     int flags = BDRV_O_CHECK;
684     bool writethrough;
685     ImageCheck *check;
686     bool quiet = false;
687     bool image_opts = false;
688     bool force_share = false;
689 
690     fmt = NULL;
691     output = NULL;
692     cache = BDRV_DEFAULT_CACHE;
693 
694     for(;;) {
695         int option_index = 0;
696         static const struct option long_options[] = {
697             {"help", no_argument, 0, 'h'},
698             {"format", required_argument, 0, 'f'},
699             {"repair", required_argument, 0, 'r'},
700             {"output", required_argument, 0, OPTION_OUTPUT},
701             {"object", required_argument, 0, OPTION_OBJECT},
702             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
703             {"force-share", no_argument, 0, 'U'},
704             {0, 0, 0, 0}
705         };
706         c = getopt_long(argc, argv, ":hf:r:T:qU",
707                         long_options, &option_index);
708         if (c == -1) {
709             break;
710         }
711         switch(c) {
712         case ':':
713             missing_argument(argv[optind - 1]);
714             break;
715         case '?':
716             unrecognized_option(argv[optind - 1]);
717             break;
718         case 'h':
719             help();
720             break;
721         case 'f':
722             fmt = optarg;
723             break;
724         case 'r':
725             flags |= BDRV_O_RDWR;
726 
727             if (!strcmp(optarg, "leaks")) {
728                 fix = BDRV_FIX_LEAKS;
729             } else if (!strcmp(optarg, "all")) {
730                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
731             } else {
732                 error_exit("Unknown option value for -r "
733                            "(expecting 'leaks' or 'all'): %s", optarg);
734             }
735             break;
736         case OPTION_OUTPUT:
737             output = optarg;
738             break;
739         case 'T':
740             cache = optarg;
741             break;
742         case 'q':
743             quiet = true;
744             break;
745         case 'U':
746             force_share = true;
747             break;
748         case OPTION_OBJECT: {
749             QemuOpts *opts;
750             opts = qemu_opts_parse_noisily(&qemu_object_opts,
751                                            optarg, true);
752             if (!opts) {
753                 return 1;
754             }
755         }   break;
756         case OPTION_IMAGE_OPTS:
757             image_opts = true;
758             break;
759         }
760     }
761     if (optind != argc - 1) {
762         error_exit("Expecting one image file name");
763     }
764     filename = argv[optind++];
765 
766     if (output && !strcmp(output, "json")) {
767         output_format = OFORMAT_JSON;
768     } else if (output && !strcmp(output, "human")) {
769         output_format = OFORMAT_HUMAN;
770     } else if (output) {
771         error_report("--output must be used with human or json as argument.");
772         return 1;
773     }
774 
775     if (qemu_opts_foreach(&qemu_object_opts,
776                           user_creatable_add_opts_foreach,
777                           qemu_img_object_print_help, &error_fatal)) {
778         return 1;
779     }
780 
781     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
782     if (ret < 0) {
783         error_report("Invalid source cache option: %s", cache);
784         return 1;
785     }
786 
787     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
788                    force_share);
789     if (!blk) {
790         return 1;
791     }
792     bs = blk_bs(blk);
793 
794     check = g_new0(ImageCheck, 1);
795     ret = collect_image_check(bs, check, filename, fmt, fix);
796 
797     if (ret == -ENOTSUP) {
798         error_report("This image format does not support checks");
799         ret = 63;
800         goto fail;
801     }
802 
803     if (check->corruptions_fixed || check->leaks_fixed) {
804         int corruptions_fixed, leaks_fixed;
805 
806         leaks_fixed         = check->leaks_fixed;
807         corruptions_fixed   = check->corruptions_fixed;
808 
809         if (output_format == OFORMAT_HUMAN) {
810             qprintf(quiet,
811                     "The following inconsistencies were found and repaired:\n\n"
812                     "    %" PRId64 " leaked clusters\n"
813                     "    %" PRId64 " corruptions\n\n"
814                     "Double checking the fixed image now...\n",
815                     check->leaks_fixed,
816                     check->corruptions_fixed);
817         }
818 
819         ret = collect_image_check(bs, check, filename, fmt, 0);
820 
821         check->leaks_fixed          = leaks_fixed;
822         check->corruptions_fixed    = corruptions_fixed;
823     }
824 
825     if (!ret) {
826         switch (output_format) {
827         case OFORMAT_HUMAN:
828             dump_human_image_check(check, quiet);
829             break;
830         case OFORMAT_JSON:
831             dump_json_image_check(check, quiet);
832             break;
833         }
834     }
835 
836     if (ret || check->check_errors) {
837         if (ret) {
838             error_report("Check failed: %s", strerror(-ret));
839         } else {
840             error_report("Check failed");
841         }
842         ret = 1;
843         goto fail;
844     }
845 
846     if (check->corruptions) {
847         ret = 2;
848     } else if (check->leaks) {
849         ret = 3;
850     } else {
851         ret = 0;
852     }
853 
854 fail:
855     qapi_free_ImageCheck(check);
856     blk_unref(blk);
857     return ret;
858 }
859 
860 typedef struct CommonBlockJobCBInfo {
861     BlockDriverState *bs;
862     Error **errp;
863 } CommonBlockJobCBInfo;
864 
865 static void common_block_job_cb(void *opaque, int ret)
866 {
867     CommonBlockJobCBInfo *cbi = opaque;
868 
869     if (ret < 0) {
870         error_setg_errno(cbi->errp, -ret, "Block job failed");
871     }
872 }
873 
874 static void run_block_job(BlockJob *job, Error **errp)
875 {
876     AioContext *aio_context = blk_get_aio_context(job->blk);
877     int ret = 0;
878 
879     aio_context_acquire(aio_context);
880     job_ref(&job->job);
881     do {
882         float progress = 0.0f;
883         aio_poll(aio_context, true);
884         if (job->job.progress_total) {
885             progress = (float)job->job.progress_current /
886                        job->job.progress_total * 100.f;
887         }
888         qemu_progress_print(progress, 0);
889     } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
890 
891     if (!job_is_completed(&job->job)) {
892         ret = job_complete_sync(&job->job, errp);
893     } else {
894         ret = job->job.ret;
895     }
896     job_unref(&job->job);
897     aio_context_release(aio_context);
898 
899     /* publish completion progress only when success */
900     if (!ret) {
901         qemu_progress_print(100.f, 0);
902     }
903 }
904 
905 static int img_commit(int argc, char **argv)
906 {
907     int c, ret, flags;
908     const char *filename, *fmt, *cache, *base;
909     BlockBackend *blk;
910     BlockDriverState *bs, *base_bs;
911     BlockJob *job;
912     bool progress = false, quiet = false, drop = false;
913     bool writethrough;
914     Error *local_err = NULL;
915     CommonBlockJobCBInfo cbi;
916     bool image_opts = false;
917     AioContext *aio_context;
918 
919     fmt = NULL;
920     cache = BDRV_DEFAULT_CACHE;
921     base = NULL;
922     for(;;) {
923         static const struct option long_options[] = {
924             {"help", no_argument, 0, 'h'},
925             {"object", required_argument, 0, OPTION_OBJECT},
926             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
927             {0, 0, 0, 0}
928         };
929         c = getopt_long(argc, argv, ":f:ht:b:dpq",
930                         long_options, NULL);
931         if (c == -1) {
932             break;
933         }
934         switch(c) {
935         case ':':
936             missing_argument(argv[optind - 1]);
937             break;
938         case '?':
939             unrecognized_option(argv[optind - 1]);
940             break;
941         case 'h':
942             help();
943             break;
944         case 'f':
945             fmt = optarg;
946             break;
947         case 't':
948             cache = optarg;
949             break;
950         case 'b':
951             base = optarg;
952             /* -b implies -d */
953             drop = true;
954             break;
955         case 'd':
956             drop = true;
957             break;
958         case 'p':
959             progress = true;
960             break;
961         case 'q':
962             quiet = true;
963             break;
964         case OPTION_OBJECT: {
965             QemuOpts *opts;
966             opts = qemu_opts_parse_noisily(&qemu_object_opts,
967                                            optarg, true);
968             if (!opts) {
969                 return 1;
970             }
971         }   break;
972         case OPTION_IMAGE_OPTS:
973             image_opts = true;
974             break;
975         }
976     }
977 
978     /* Progress is not shown in Quiet mode */
979     if (quiet) {
980         progress = false;
981     }
982 
983     if (optind != argc - 1) {
984         error_exit("Expecting one image file name");
985     }
986     filename = argv[optind++];
987 
988     if (qemu_opts_foreach(&qemu_object_opts,
989                           user_creatable_add_opts_foreach,
990                           qemu_img_object_print_help, &error_fatal)) {
991         return 1;
992     }
993 
994     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
995     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
996     if (ret < 0) {
997         error_report("Invalid cache option: %s", cache);
998         return 1;
999     }
1000 
1001     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1002                    false);
1003     if (!blk) {
1004         return 1;
1005     }
1006     bs = blk_bs(blk);
1007 
1008     qemu_progress_init(progress, 1.f);
1009     qemu_progress_print(0.f, 100);
1010 
1011     if (base) {
1012         base_bs = bdrv_find_backing_image(bs, base);
1013         if (!base_bs) {
1014             error_setg(&local_err,
1015                        "Did not find '%s' in the backing chain of '%s'",
1016                        base, filename);
1017             goto done;
1018         }
1019     } else {
1020         /* This is different from QMP, which by default uses the deepest file in
1021          * the backing chain (i.e., the very base); however, the traditional
1022          * behavior of qemu-img commit is using the immediate backing file. */
1023         base_bs = backing_bs(bs);
1024         if (!base_bs) {
1025             error_setg(&local_err, "Image does not have a backing file");
1026             goto done;
1027         }
1028     }
1029 
1030     cbi = (CommonBlockJobCBInfo){
1031         .errp = &local_err,
1032         .bs   = bs,
1033     };
1034 
1035     aio_context = bdrv_get_aio_context(bs);
1036     aio_context_acquire(aio_context);
1037     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, 0,
1038                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1039                         &cbi, false, &local_err);
1040     aio_context_release(aio_context);
1041     if (local_err) {
1042         goto done;
1043     }
1044 
1045     /* When the block job completes, the BlockBackend reference will point to
1046      * the old backing file. In order to avoid that the top image is already
1047      * deleted, so we can still empty it afterwards, increment the reference
1048      * counter here preemptively. */
1049     if (!drop) {
1050         bdrv_ref(bs);
1051     }
1052 
1053     job = block_job_get("commit");
1054     assert(job);
1055     run_block_job(job, &local_err);
1056     if (local_err) {
1057         goto unref_backing;
1058     }
1059 
1060     if (!drop && bs->drv->bdrv_make_empty) {
1061         ret = bs->drv->bdrv_make_empty(bs);
1062         if (ret) {
1063             error_setg_errno(&local_err, -ret, "Could not empty %s",
1064                              filename);
1065             goto unref_backing;
1066         }
1067     }
1068 
1069 unref_backing:
1070     if (!drop) {
1071         bdrv_unref(bs);
1072     }
1073 
1074 done:
1075     qemu_progress_end();
1076 
1077     blk_unref(blk);
1078 
1079     if (local_err) {
1080         error_report_err(local_err);
1081         return 1;
1082     }
1083 
1084     qprintf(quiet, "Image committed.\n");
1085     return 0;
1086 }
1087 
1088 /*
1089  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1090  * of the first sector boundary within buf where the sector contains a
1091  * non-zero byte.  This function is robust to a buffer that is not
1092  * sector-aligned.
1093  */
1094 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1095 {
1096     int64_t i;
1097     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1098 
1099     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1100         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1101             return i;
1102         }
1103     }
1104     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1105         return i;
1106     }
1107     return -1;
1108 }
1109 
1110 /*
1111  * Returns true iff the first sector pointed to by 'buf' contains at least
1112  * a non-NUL byte.
1113  *
1114  * 'pnum' is set to the number of sectors (including and immediately following
1115  * the first one) that are known to be in the same allocated/unallocated state.
1116  * The function will try to align the end offset to alignment boundaries so
1117  * that the request will at least end aligned and consequtive requests will
1118  * also start at an aligned offset.
1119  */
1120 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1121                                 int64_t sector_num, int alignment)
1122 {
1123     bool is_zero;
1124     int i, tail;
1125 
1126     if (n <= 0) {
1127         *pnum = 0;
1128         return 0;
1129     }
1130     is_zero = buffer_is_zero(buf, 512);
1131     for(i = 1; i < n; i++) {
1132         buf += 512;
1133         if (is_zero != buffer_is_zero(buf, 512)) {
1134             break;
1135         }
1136     }
1137 
1138     tail = (sector_num + i) & (alignment - 1);
1139     if (tail) {
1140         if (is_zero && i <= tail) {
1141             /* treat unallocated areas which only consist
1142              * of a small tail as allocated. */
1143             is_zero = false;
1144         }
1145         if (!is_zero) {
1146             /* align up end offset of allocated areas. */
1147             i += alignment - tail;
1148             i = MIN(i, n);
1149         } else {
1150             /* align down end offset of zero areas. */
1151             i -= tail;
1152         }
1153     }
1154     *pnum = i;
1155     return !is_zero;
1156 }
1157 
1158 /*
1159  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1160  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1161  * breaking up write requests for only small sparse areas.
1162  */
1163 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1164     int min, int64_t sector_num, int alignment)
1165 {
1166     int ret;
1167     int num_checked, num_used;
1168 
1169     if (n < min) {
1170         min = n;
1171     }
1172 
1173     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1174     if (!ret) {
1175         return ret;
1176     }
1177 
1178     num_used = *pnum;
1179     buf += BDRV_SECTOR_SIZE * *pnum;
1180     n -= *pnum;
1181     sector_num += *pnum;
1182     num_checked = num_used;
1183 
1184     while (n > 0) {
1185         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1186 
1187         buf += BDRV_SECTOR_SIZE * *pnum;
1188         n -= *pnum;
1189         sector_num += *pnum;
1190         num_checked += *pnum;
1191         if (ret) {
1192             num_used = num_checked;
1193         } else if (*pnum >= min) {
1194             break;
1195         }
1196     }
1197 
1198     *pnum = num_used;
1199     return 1;
1200 }
1201 
1202 /*
1203  * Compares two buffers sector by sector. Returns 0 if the first
1204  * sector of each buffer matches, non-zero otherwise.
1205  *
1206  * pnum is set to the sector-aligned size of the buffer prefix that
1207  * has the same matching status as the first sector.
1208  */
1209 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1210                            int64_t bytes, int64_t *pnum)
1211 {
1212     bool res;
1213     int64_t i = MIN(bytes, BDRV_SECTOR_SIZE);
1214 
1215     assert(bytes > 0);
1216 
1217     res = !!memcmp(buf1, buf2, i);
1218     while (i < bytes) {
1219         int64_t len = MIN(bytes - i, BDRV_SECTOR_SIZE);
1220 
1221         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1222             break;
1223         }
1224         i += len;
1225     }
1226 
1227     *pnum = i;
1228     return res;
1229 }
1230 
1231 #define IO_BUF_SIZE (2 * MiB)
1232 
1233 /*
1234  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1235  *
1236  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1237  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1238  * failure), and 4 on error (the exit status for read errors), after emitting
1239  * an error message.
1240  *
1241  * @param blk:  BlockBackend for the image
1242  * @param offset: Starting offset to check
1243  * @param bytes: Number of bytes to check
1244  * @param filename: Name of disk file we are checking (logging purpose)
1245  * @param buffer: Allocated buffer for storing read data
1246  * @param quiet: Flag for quiet mode
1247  */
1248 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1249                                int64_t bytes, const char *filename,
1250                                uint8_t *buffer, bool quiet)
1251 {
1252     int ret = 0;
1253     int64_t idx;
1254 
1255     ret = blk_pread(blk, offset, buffer, bytes);
1256     if (ret < 0) {
1257         error_report("Error while reading offset %" PRId64 " of %s: %s",
1258                      offset, filename, strerror(-ret));
1259         return 4;
1260     }
1261     idx = find_nonzero(buffer, bytes);
1262     if (idx >= 0) {
1263         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1264                 offset + idx);
1265         return 1;
1266     }
1267 
1268     return 0;
1269 }
1270 
1271 /*
1272  * Compares two images. Exit codes:
1273  *
1274  * 0 - Images are identical
1275  * 1 - Images differ
1276  * >1 - Error occurred
1277  */
1278 static int img_compare(int argc, char **argv)
1279 {
1280     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1281     BlockBackend *blk1, *blk2;
1282     BlockDriverState *bs1, *bs2;
1283     int64_t total_size1, total_size2;
1284     uint8_t *buf1 = NULL, *buf2 = NULL;
1285     int64_t pnum1, pnum2;
1286     int allocated1, allocated2;
1287     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1288     bool progress = false, quiet = false, strict = false;
1289     int flags;
1290     bool writethrough;
1291     int64_t total_size;
1292     int64_t offset = 0;
1293     int64_t chunk;
1294     int c;
1295     uint64_t progress_base;
1296     bool image_opts = false;
1297     bool force_share = false;
1298 
1299     cache = BDRV_DEFAULT_CACHE;
1300     for (;;) {
1301         static const struct option long_options[] = {
1302             {"help", no_argument, 0, 'h'},
1303             {"object", required_argument, 0, OPTION_OBJECT},
1304             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1305             {"force-share", no_argument, 0, 'U'},
1306             {0, 0, 0, 0}
1307         };
1308         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1309                         long_options, NULL);
1310         if (c == -1) {
1311             break;
1312         }
1313         switch (c) {
1314         case ':':
1315             missing_argument(argv[optind - 1]);
1316             break;
1317         case '?':
1318             unrecognized_option(argv[optind - 1]);
1319             break;
1320         case 'h':
1321             help();
1322             break;
1323         case 'f':
1324             fmt1 = optarg;
1325             break;
1326         case 'F':
1327             fmt2 = optarg;
1328             break;
1329         case 'T':
1330             cache = optarg;
1331             break;
1332         case 'p':
1333             progress = true;
1334             break;
1335         case 'q':
1336             quiet = true;
1337             break;
1338         case 's':
1339             strict = true;
1340             break;
1341         case 'U':
1342             force_share = true;
1343             break;
1344         case OPTION_OBJECT: {
1345             QemuOpts *opts;
1346             opts = qemu_opts_parse_noisily(&qemu_object_opts,
1347                                            optarg, true);
1348             if (!opts) {
1349                 ret = 2;
1350                 goto out4;
1351             }
1352         }   break;
1353         case OPTION_IMAGE_OPTS:
1354             image_opts = true;
1355             break;
1356         }
1357     }
1358 
1359     /* Progress is not shown in Quiet mode */
1360     if (quiet) {
1361         progress = false;
1362     }
1363 
1364 
1365     if (optind != argc - 2) {
1366         error_exit("Expecting two image file names");
1367     }
1368     filename1 = argv[optind++];
1369     filename2 = argv[optind++];
1370 
1371     if (qemu_opts_foreach(&qemu_object_opts,
1372                           user_creatable_add_opts_foreach,
1373                           qemu_img_object_print_help, &error_fatal)) {
1374         ret = 2;
1375         goto out4;
1376     }
1377 
1378     /* Initialize before goto out */
1379     qemu_progress_init(progress, 2.0);
1380 
1381     flags = 0;
1382     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1383     if (ret < 0) {
1384         error_report("Invalid source cache option: %s", cache);
1385         ret = 2;
1386         goto out3;
1387     }
1388 
1389     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1390                     force_share);
1391     if (!blk1) {
1392         ret = 2;
1393         goto out3;
1394     }
1395 
1396     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1397                     force_share);
1398     if (!blk2) {
1399         ret = 2;
1400         goto out2;
1401     }
1402     bs1 = blk_bs(blk1);
1403     bs2 = blk_bs(blk2);
1404 
1405     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1406     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1407     total_size1 = blk_getlength(blk1);
1408     if (total_size1 < 0) {
1409         error_report("Can't get size of %s: %s",
1410                      filename1, strerror(-total_size1));
1411         ret = 4;
1412         goto out;
1413     }
1414     total_size2 = blk_getlength(blk2);
1415     if (total_size2 < 0) {
1416         error_report("Can't get size of %s: %s",
1417                      filename2, strerror(-total_size2));
1418         ret = 4;
1419         goto out;
1420     }
1421     total_size = MIN(total_size1, total_size2);
1422     progress_base = MAX(total_size1, total_size2);
1423 
1424     qemu_progress_print(0, 100);
1425 
1426     if (strict && total_size1 != total_size2) {
1427         ret = 1;
1428         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1429         goto out;
1430     }
1431 
1432     while (offset < total_size) {
1433         int status1, status2;
1434 
1435         status1 = bdrv_block_status_above(bs1, NULL, offset,
1436                                           total_size1 - offset, &pnum1, NULL,
1437                                           NULL);
1438         if (status1 < 0) {
1439             ret = 3;
1440             error_report("Sector allocation test failed for %s", filename1);
1441             goto out;
1442         }
1443         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1444 
1445         status2 = bdrv_block_status_above(bs2, NULL, offset,
1446                                           total_size2 - offset, &pnum2, NULL,
1447                                           NULL);
1448         if (status2 < 0) {
1449             ret = 3;
1450             error_report("Sector allocation test failed for %s", filename2);
1451             goto out;
1452         }
1453         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1454 
1455         assert(pnum1 && pnum2);
1456         chunk = MIN(pnum1, pnum2);
1457 
1458         if (strict) {
1459             if (status1 != status2) {
1460                 ret = 1;
1461                 qprintf(quiet, "Strict mode: Offset %" PRId64
1462                         " block status mismatch!\n", offset);
1463                 goto out;
1464             }
1465         }
1466         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1467             /* nothing to do */
1468         } else if (allocated1 == allocated2) {
1469             if (allocated1) {
1470                 int64_t pnum;
1471 
1472                 chunk = MIN(chunk, IO_BUF_SIZE);
1473                 ret = blk_pread(blk1, offset, buf1, chunk);
1474                 if (ret < 0) {
1475                     error_report("Error while reading offset %" PRId64
1476                                  " of %s: %s",
1477                                  offset, filename1, strerror(-ret));
1478                     ret = 4;
1479                     goto out;
1480                 }
1481                 ret = blk_pread(blk2, offset, buf2, chunk);
1482                 if (ret < 0) {
1483                     error_report("Error while reading offset %" PRId64
1484                                  " of %s: %s",
1485                                  offset, filename2, strerror(-ret));
1486                     ret = 4;
1487                     goto out;
1488                 }
1489                 ret = compare_buffers(buf1, buf2, chunk, &pnum);
1490                 if (ret || pnum != chunk) {
1491                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1492                             offset + (ret ? 0 : pnum));
1493                     ret = 1;
1494                     goto out;
1495                 }
1496             }
1497         } else {
1498             chunk = MIN(chunk, IO_BUF_SIZE);
1499             if (allocated1) {
1500                 ret = check_empty_sectors(blk1, offset, chunk,
1501                                           filename1, buf1, quiet);
1502             } else {
1503                 ret = check_empty_sectors(blk2, offset, chunk,
1504                                           filename2, buf1, quiet);
1505             }
1506             if (ret) {
1507                 goto out;
1508             }
1509         }
1510         offset += chunk;
1511         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1512     }
1513 
1514     if (total_size1 != total_size2) {
1515         BlockBackend *blk_over;
1516         const char *filename_over;
1517 
1518         qprintf(quiet, "Warning: Image size mismatch!\n");
1519         if (total_size1 > total_size2) {
1520             blk_over = blk1;
1521             filename_over = filename1;
1522         } else {
1523             blk_over = blk2;
1524             filename_over = filename2;
1525         }
1526 
1527         while (offset < progress_base) {
1528             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1529                                           progress_base - offset, &chunk,
1530                                           NULL, NULL);
1531             if (ret < 0) {
1532                 ret = 3;
1533                 error_report("Sector allocation test failed for %s",
1534                              filename_over);
1535                 goto out;
1536 
1537             }
1538             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1539                 chunk = MIN(chunk, IO_BUF_SIZE);
1540                 ret = check_empty_sectors(blk_over, offset, chunk,
1541                                           filename_over, buf1, quiet);
1542                 if (ret) {
1543                     goto out;
1544                 }
1545             }
1546             offset += chunk;
1547             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1548         }
1549     }
1550 
1551     qprintf(quiet, "Images are identical.\n");
1552     ret = 0;
1553 
1554 out:
1555     qemu_vfree(buf1);
1556     qemu_vfree(buf2);
1557     blk_unref(blk2);
1558 out2:
1559     blk_unref(blk1);
1560 out3:
1561     qemu_progress_end();
1562 out4:
1563     return ret;
1564 }
1565 
1566 enum ImgConvertBlockStatus {
1567     BLK_DATA,
1568     BLK_ZERO,
1569     BLK_BACKING_FILE,
1570 };
1571 
1572 #define MAX_COROUTINES 16
1573 
1574 typedef struct ImgConvertState {
1575     BlockBackend **src;
1576     int64_t *src_sectors;
1577     int src_num;
1578     int64_t total_sectors;
1579     int64_t allocated_sectors;
1580     int64_t allocated_done;
1581     int64_t sector_num;
1582     int64_t wr_offs;
1583     enum ImgConvertBlockStatus status;
1584     int64_t sector_next_status;
1585     BlockBackend *target;
1586     bool has_zero_init;
1587     bool compressed;
1588     bool unallocated_blocks_are_zero;
1589     bool target_is_new;
1590     bool target_has_backing;
1591     int64_t target_backing_sectors; /* negative if unknown */
1592     bool wr_in_order;
1593     bool copy_range;
1594     bool salvage;
1595     bool quiet;
1596     int min_sparse;
1597     int alignment;
1598     size_t cluster_sectors;
1599     size_t buf_sectors;
1600     long num_coroutines;
1601     int running_coroutines;
1602     Coroutine *co[MAX_COROUTINES];
1603     int64_t wait_sector_num[MAX_COROUTINES];
1604     CoMutex lock;
1605     int ret;
1606 } ImgConvertState;
1607 
1608 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1609                                 int *src_cur, int64_t *src_cur_offset)
1610 {
1611     *src_cur = 0;
1612     *src_cur_offset = 0;
1613     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1614         *src_cur_offset += s->src_sectors[*src_cur];
1615         (*src_cur)++;
1616         assert(*src_cur < s->src_num);
1617     }
1618 }
1619 
1620 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1621 {
1622     int64_t src_cur_offset;
1623     int ret, n, src_cur;
1624     bool post_backing_zero = false;
1625 
1626     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1627 
1628     assert(s->total_sectors > sector_num);
1629     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1630 
1631     if (s->target_backing_sectors >= 0) {
1632         if (sector_num >= s->target_backing_sectors) {
1633             post_backing_zero = s->unallocated_blocks_are_zero;
1634         } else if (sector_num + n > s->target_backing_sectors) {
1635             /* Split requests around target_backing_sectors (because
1636              * starting from there, zeros are handled differently) */
1637             n = s->target_backing_sectors - sector_num;
1638         }
1639     }
1640 
1641     if (s->sector_next_status <= sector_num) {
1642         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1643         int64_t count;
1644 
1645         do {
1646             count = n * BDRV_SECTOR_SIZE;
1647 
1648             if (s->target_has_backing) {
1649                 ret = bdrv_block_status(blk_bs(s->src[src_cur]), offset,
1650                                         count, &count, NULL, NULL);
1651             } else {
1652                 ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL,
1653                                               offset, count, &count, NULL,
1654                                               NULL);
1655             }
1656 
1657             if (ret < 0) {
1658                 if (s->salvage) {
1659                     if (n == 1) {
1660                         if (!s->quiet) {
1661                             warn_report("error while reading block status at "
1662                                         "offset %" PRIu64 ": %s", offset,
1663                                         strerror(-ret));
1664                         }
1665                         /* Just try to read the data, then */
1666                         ret = BDRV_BLOCK_DATA;
1667                         count = BDRV_SECTOR_SIZE;
1668                     } else {
1669                         /* Retry on a shorter range */
1670                         n = DIV_ROUND_UP(n, 4);
1671                     }
1672                 } else {
1673                     error_report("error while reading block status at offset "
1674                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1675                     return ret;
1676                 }
1677             }
1678         } while (ret < 0);
1679 
1680         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1681 
1682         if (ret & BDRV_BLOCK_ZERO) {
1683             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1684         } else if (ret & BDRV_BLOCK_DATA) {
1685             s->status = BLK_DATA;
1686         } else {
1687             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1688         }
1689 
1690         s->sector_next_status = sector_num + n;
1691     }
1692 
1693     n = MIN(n, s->sector_next_status - sector_num);
1694     if (s->status == BLK_DATA) {
1695         n = MIN(n, s->buf_sectors);
1696     }
1697 
1698     /* We need to write complete clusters for compressed images, so if an
1699      * unallocated area is shorter than that, we must consider the whole
1700      * cluster allocated. */
1701     if (s->compressed) {
1702         if (n < s->cluster_sectors) {
1703             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1704             s->status = BLK_DATA;
1705         } else {
1706             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1707         }
1708     }
1709 
1710     return n;
1711 }
1712 
1713 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1714                                         int nb_sectors, uint8_t *buf)
1715 {
1716     uint64_t single_read_until = 0;
1717     int n, ret;
1718 
1719     assert(nb_sectors <= s->buf_sectors);
1720     while (nb_sectors > 0) {
1721         BlockBackend *blk;
1722         int src_cur;
1723         int64_t bs_sectors, src_cur_offset;
1724         uint64_t offset;
1725 
1726         /* In the case of compression with multiple source files, we can get a
1727          * nb_sectors that spreads into the next part. So we must be able to
1728          * read across multiple BDSes for one convert_read() call. */
1729         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1730         blk = s->src[src_cur];
1731         bs_sectors = s->src_sectors[src_cur];
1732 
1733         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1734 
1735         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1736         if (single_read_until > offset) {
1737             n = 1;
1738         }
1739 
1740         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1741         if (ret < 0) {
1742             if (s->salvage) {
1743                 if (n > 1) {
1744                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1745                     continue;
1746                 } else {
1747                     if (!s->quiet) {
1748                         warn_report("error while reading offset %" PRIu64
1749                                     ": %s", offset, strerror(-ret));
1750                     }
1751                     memset(buf, 0, BDRV_SECTOR_SIZE);
1752                 }
1753             } else {
1754                 return ret;
1755             }
1756         }
1757 
1758         sector_num += n;
1759         nb_sectors -= n;
1760         buf += n * BDRV_SECTOR_SIZE;
1761     }
1762 
1763     return 0;
1764 }
1765 
1766 
1767 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1768                                          int nb_sectors, uint8_t *buf,
1769                                          enum ImgConvertBlockStatus status)
1770 {
1771     int ret;
1772 
1773     while (nb_sectors > 0) {
1774         int n = nb_sectors;
1775         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1776 
1777         switch (status) {
1778         case BLK_BACKING_FILE:
1779             /* If we have a backing file, leave clusters unallocated that are
1780              * unallocated in the source image, so that the backing file is
1781              * visible at the respective offset. */
1782             assert(s->target_has_backing);
1783             break;
1784 
1785         case BLK_DATA:
1786             /* If we're told to keep the target fully allocated (-S 0) or there
1787              * is real non-zero data, we must write it. Otherwise we can treat
1788              * it as zero sectors.
1789              * Compressed clusters need to be written as a whole, so in that
1790              * case we can only save the write if the buffer is completely
1791              * zeroed. */
1792             if (!s->min_sparse ||
1793                 (!s->compressed &&
1794                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1795                                           sector_num, s->alignment)) ||
1796                 (s->compressed &&
1797                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1798             {
1799                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1800                                     n << BDRV_SECTOR_BITS, buf, flags);
1801                 if (ret < 0) {
1802                     return ret;
1803                 }
1804                 break;
1805             }
1806             /* fall-through */
1807 
1808         case BLK_ZERO:
1809             if (s->has_zero_init) {
1810                 assert(!s->target_has_backing);
1811                 break;
1812             }
1813             ret = blk_co_pwrite_zeroes(s->target,
1814                                        sector_num << BDRV_SECTOR_BITS,
1815                                        n << BDRV_SECTOR_BITS,
1816                                        BDRV_REQ_MAY_UNMAP);
1817             if (ret < 0) {
1818                 return ret;
1819             }
1820             break;
1821         }
1822 
1823         sector_num += n;
1824         nb_sectors -= n;
1825         buf += n * BDRV_SECTOR_SIZE;
1826     }
1827 
1828     return 0;
1829 }
1830 
1831 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1832                                               int nb_sectors)
1833 {
1834     int n, ret;
1835 
1836     while (nb_sectors > 0) {
1837         BlockBackend *blk;
1838         int src_cur;
1839         int64_t bs_sectors, src_cur_offset;
1840         int64_t offset;
1841 
1842         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1843         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1844         blk = s->src[src_cur];
1845         bs_sectors = s->src_sectors[src_cur];
1846 
1847         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1848 
1849         ret = blk_co_copy_range(blk, offset, s->target,
1850                                 sector_num << BDRV_SECTOR_BITS,
1851                                 n << BDRV_SECTOR_BITS, 0, 0);
1852         if (ret < 0) {
1853             return ret;
1854         }
1855 
1856         sector_num += n;
1857         nb_sectors -= n;
1858     }
1859     return 0;
1860 }
1861 
1862 static void coroutine_fn convert_co_do_copy(void *opaque)
1863 {
1864     ImgConvertState *s = opaque;
1865     uint8_t *buf = NULL;
1866     int ret, i;
1867     int index = -1;
1868 
1869     for (i = 0; i < s->num_coroutines; i++) {
1870         if (s->co[i] == qemu_coroutine_self()) {
1871             index = i;
1872             break;
1873         }
1874     }
1875     assert(index >= 0);
1876 
1877     s->running_coroutines++;
1878     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1879 
1880     while (1) {
1881         int n;
1882         int64_t sector_num;
1883         enum ImgConvertBlockStatus status;
1884         bool copy_range;
1885 
1886         qemu_co_mutex_lock(&s->lock);
1887         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1888             qemu_co_mutex_unlock(&s->lock);
1889             break;
1890         }
1891         n = convert_iteration_sectors(s, s->sector_num);
1892         if (n < 0) {
1893             qemu_co_mutex_unlock(&s->lock);
1894             s->ret = n;
1895             break;
1896         }
1897         /* save current sector and allocation status to local variables */
1898         sector_num = s->sector_num;
1899         status = s->status;
1900         if (!s->min_sparse && s->status == BLK_ZERO) {
1901             n = MIN(n, s->buf_sectors);
1902         }
1903         /* increment global sector counter so that other coroutines can
1904          * already continue reading beyond this request */
1905         s->sector_num += n;
1906         qemu_co_mutex_unlock(&s->lock);
1907 
1908         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
1909             s->allocated_done += n;
1910             qemu_progress_print(100.0 * s->allocated_done /
1911                                         s->allocated_sectors, 0);
1912         }
1913 
1914 retry:
1915         copy_range = s->copy_range && s->status == BLK_DATA;
1916         if (status == BLK_DATA && !copy_range) {
1917             ret = convert_co_read(s, sector_num, n, buf);
1918             if (ret < 0) {
1919                 error_report("error while reading sector %" PRId64
1920                              ": %s", sector_num, strerror(-ret));
1921                 s->ret = ret;
1922             }
1923         } else if (!s->min_sparse && status == BLK_ZERO) {
1924             status = BLK_DATA;
1925             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
1926         }
1927 
1928         if (s->wr_in_order) {
1929             /* keep writes in order */
1930             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
1931                 s->wait_sector_num[index] = sector_num;
1932                 qemu_coroutine_yield();
1933             }
1934             s->wait_sector_num[index] = -1;
1935         }
1936 
1937         if (s->ret == -EINPROGRESS) {
1938             if (copy_range) {
1939                 ret = convert_co_copy_range(s, sector_num, n);
1940                 if (ret) {
1941                     s->copy_range = false;
1942                     goto retry;
1943                 }
1944             } else {
1945                 ret = convert_co_write(s, sector_num, n, buf, status);
1946             }
1947             if (ret < 0) {
1948                 error_report("error while writing sector %" PRId64
1949                              ": %s", sector_num, strerror(-ret));
1950                 s->ret = ret;
1951             }
1952         }
1953 
1954         if (s->wr_in_order) {
1955             /* reenter the coroutine that might have waited
1956              * for this write to complete */
1957             s->wr_offs = sector_num + n;
1958             for (i = 0; i < s->num_coroutines; i++) {
1959                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
1960                     /*
1961                      * A -> B -> A cannot occur because A has
1962                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
1963                      * B will never enter A during this time window.
1964                      */
1965                     qemu_coroutine_enter(s->co[i]);
1966                     break;
1967                 }
1968             }
1969         }
1970     }
1971 
1972     qemu_vfree(buf);
1973     s->co[index] = NULL;
1974     s->running_coroutines--;
1975     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
1976         /* the convert job finished successfully */
1977         s->ret = 0;
1978     }
1979 }
1980 
1981 static int convert_do_copy(ImgConvertState *s)
1982 {
1983     int ret, i, n;
1984     int64_t sector_num = 0;
1985 
1986     /* Check whether we have zero initialisation or can get it efficiently */
1987     if (s->target_is_new && s->min_sparse && !s->target_has_backing) {
1988         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
1989     } else {
1990         s->has_zero_init = false;
1991     }
1992 
1993     if (!s->has_zero_init && !s->target_has_backing &&
1994         bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
1995     {
1996         ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
1997         if (ret == 0) {
1998             s->has_zero_init = true;
1999         }
2000     }
2001 
2002     /* Allocate buffer for copied data. For compressed images, only one cluster
2003      * can be copied at a time. */
2004     if (s->compressed) {
2005         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2006             error_report("invalid cluster size");
2007             return -EINVAL;
2008         }
2009         s->buf_sectors = s->cluster_sectors;
2010     }
2011 
2012     while (sector_num < s->total_sectors) {
2013         n = convert_iteration_sectors(s, sector_num);
2014         if (n < 0) {
2015             return n;
2016         }
2017         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2018         {
2019             s->allocated_sectors += n;
2020         }
2021         sector_num += n;
2022     }
2023 
2024     /* Do the copy */
2025     s->sector_next_status = 0;
2026     s->ret = -EINPROGRESS;
2027 
2028     qemu_co_mutex_init(&s->lock);
2029     for (i = 0; i < s->num_coroutines; i++) {
2030         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2031         s->wait_sector_num[i] = -1;
2032         qemu_coroutine_enter(s->co[i]);
2033     }
2034 
2035     while (s->running_coroutines) {
2036         main_loop_wait(false);
2037     }
2038 
2039     if (s->compressed && !s->ret) {
2040         /* signal EOF to align */
2041         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
2042         if (ret < 0) {
2043             return ret;
2044         }
2045     }
2046 
2047     return s->ret;
2048 }
2049 
2050 #define MAX_BUF_SECTORS 32768
2051 
2052 static int img_convert(int argc, char **argv)
2053 {
2054     int c, bs_i, flags, src_flags = 0;
2055     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2056                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2057                *out_filename, *out_baseimg_param, *snapshot_name = NULL;
2058     BlockDriver *drv = NULL, *proto_drv = NULL;
2059     BlockDriverInfo bdi;
2060     BlockDriverState *out_bs;
2061     QemuOpts *opts = NULL, *sn_opts = NULL;
2062     QemuOptsList *create_opts = NULL;
2063     QDict *open_opts = NULL;
2064     char *options = NULL;
2065     Error *local_err = NULL;
2066     bool writethrough, src_writethrough, image_opts = false,
2067          skip_create = false, progress = false, tgt_image_opts = false;
2068     int64_t ret = -EINVAL;
2069     bool force_share = false;
2070     bool explict_min_sparse = false;
2071 
2072     ImgConvertState s = (ImgConvertState) {
2073         /* Need at least 4k of zeros for sparse detection */
2074         .min_sparse         = 8,
2075         .copy_range         = false,
2076         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2077         .wr_in_order        = true,
2078         .num_coroutines     = 8,
2079     };
2080 
2081     for(;;) {
2082         static const struct option long_options[] = {
2083             {"help", no_argument, 0, 'h'},
2084             {"object", required_argument, 0, OPTION_OBJECT},
2085             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2086             {"force-share", no_argument, 0, 'U'},
2087             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2088             {"salvage", no_argument, 0, OPTION_SALVAGE},
2089             {0, 0, 0, 0}
2090         };
2091         c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WU",
2092                         long_options, NULL);
2093         if (c == -1) {
2094             break;
2095         }
2096         switch(c) {
2097         case ':':
2098             missing_argument(argv[optind - 1]);
2099             break;
2100         case '?':
2101             unrecognized_option(argv[optind - 1]);
2102             break;
2103         case 'h':
2104             help();
2105             break;
2106         case 'f':
2107             fmt = optarg;
2108             break;
2109         case 'O':
2110             out_fmt = optarg;
2111             break;
2112         case 'B':
2113             out_baseimg = optarg;
2114             break;
2115         case 'C':
2116             s.copy_range = true;
2117             break;
2118         case 'c':
2119             s.compressed = true;
2120             break;
2121         case 'o':
2122             if (!is_valid_option_list(optarg)) {
2123                 error_report("Invalid option list: %s", optarg);
2124                 goto fail_getopt;
2125             }
2126             if (!options) {
2127                 options = g_strdup(optarg);
2128             } else {
2129                 char *old_options = options;
2130                 options = g_strdup_printf("%s,%s", options, optarg);
2131                 g_free(old_options);
2132             }
2133             break;
2134         case 'l':
2135             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2136                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2137                                                   optarg, false);
2138                 if (!sn_opts) {
2139                     error_report("Failed in parsing snapshot param '%s'",
2140                                  optarg);
2141                     goto fail_getopt;
2142                 }
2143             } else {
2144                 snapshot_name = optarg;
2145             }
2146             break;
2147         case 'S':
2148         {
2149             int64_t sval;
2150 
2151             sval = cvtnum(optarg);
2152             if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2153                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2154                 error_report("Invalid buffer size for sparse output specified. "
2155                     "Valid sizes are multiples of %llu up to %llu. Select "
2156                     "0 to disable sparse detection (fully allocates output).",
2157                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2158                 goto fail_getopt;
2159             }
2160 
2161             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2162             explict_min_sparse = true;
2163             break;
2164         }
2165         case 'p':
2166             progress = true;
2167             break;
2168         case 't':
2169             cache = optarg;
2170             break;
2171         case 'T':
2172             src_cache = optarg;
2173             break;
2174         case 'q':
2175             s.quiet = true;
2176             break;
2177         case 'n':
2178             skip_create = true;
2179             break;
2180         case 'm':
2181             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2182                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2183                 error_report("Invalid number of coroutines. Allowed number of"
2184                              " coroutines is between 1 and %d", MAX_COROUTINES);
2185                 goto fail_getopt;
2186             }
2187             break;
2188         case 'W':
2189             s.wr_in_order = false;
2190             break;
2191         case 'U':
2192             force_share = true;
2193             break;
2194         case OPTION_OBJECT: {
2195             QemuOpts *object_opts;
2196             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
2197                                                   optarg, true);
2198             if (!object_opts) {
2199                 goto fail_getopt;
2200             }
2201             break;
2202         }
2203         case OPTION_IMAGE_OPTS:
2204             image_opts = true;
2205             break;
2206         case OPTION_SALVAGE:
2207             s.salvage = true;
2208             break;
2209         case OPTION_TARGET_IMAGE_OPTS:
2210             tgt_image_opts = true;
2211             break;
2212         }
2213     }
2214 
2215     if (!out_fmt && !tgt_image_opts) {
2216         out_fmt = "raw";
2217     }
2218 
2219     if (qemu_opts_foreach(&qemu_object_opts,
2220                           user_creatable_add_opts_foreach,
2221                           qemu_img_object_print_help, &error_fatal)) {
2222         goto fail_getopt;
2223     }
2224 
2225     if (s.compressed && s.copy_range) {
2226         error_report("Cannot enable copy offloading when -c is used");
2227         goto fail_getopt;
2228     }
2229 
2230     if (explict_min_sparse && s.copy_range) {
2231         error_report("Cannot enable copy offloading when -S is used");
2232         goto fail_getopt;
2233     }
2234 
2235     if (s.copy_range && s.salvage) {
2236         error_report("Cannot use copy offloading in salvaging mode");
2237         goto fail_getopt;
2238     }
2239 
2240     if (tgt_image_opts && !skip_create) {
2241         error_report("--target-image-opts requires use of -n flag");
2242         goto fail_getopt;
2243     }
2244 
2245     if (skip_create && options) {
2246         warn_report("-o has no effect when skipping image creation");
2247         warn_report("This will become an error in future QEMU versions.");
2248     }
2249 
2250     s.src_num = argc - optind - 1;
2251     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2252 
2253     if (options && has_help_option(options)) {
2254         if (out_fmt) {
2255             ret = print_block_option_help(out_filename, out_fmt);
2256             goto fail_getopt;
2257         } else {
2258             error_report("Option help requires a format be specified");
2259             goto fail_getopt;
2260         }
2261     }
2262 
2263     if (s.src_num < 1) {
2264         error_report("Must specify image file name");
2265         goto fail_getopt;
2266     }
2267 
2268 
2269     /* ret is still -EINVAL until here */
2270     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2271     if (ret < 0) {
2272         error_report("Invalid source cache option: %s", src_cache);
2273         goto fail_getopt;
2274     }
2275 
2276     /* Initialize before goto out */
2277     if (s.quiet) {
2278         progress = false;
2279     }
2280     qemu_progress_init(progress, 1.0);
2281     qemu_progress_print(0, 100);
2282 
2283     s.src = g_new0(BlockBackend *, s.src_num);
2284     s.src_sectors = g_new(int64_t, s.src_num);
2285 
2286     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2287         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2288                                fmt, src_flags, src_writethrough, s.quiet,
2289                                force_share);
2290         if (!s.src[bs_i]) {
2291             ret = -1;
2292             goto out;
2293         }
2294         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2295         if (s.src_sectors[bs_i] < 0) {
2296             error_report("Could not get size of %s: %s",
2297                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2298             ret = -1;
2299             goto out;
2300         }
2301         s.total_sectors += s.src_sectors[bs_i];
2302     }
2303 
2304     if (sn_opts) {
2305         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2306                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2307                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2308                                &local_err);
2309     } else if (snapshot_name != NULL) {
2310         if (s.src_num > 1) {
2311             error_report("No support for concatenating multiple snapshot");
2312             ret = -1;
2313             goto out;
2314         }
2315 
2316         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2317                                              &local_err);
2318     }
2319     if (local_err) {
2320         error_reportf_err(local_err, "Failed to load snapshot: ");
2321         ret = -1;
2322         goto out;
2323     }
2324 
2325     if (!skip_create) {
2326         /* Find driver and parse its options */
2327         drv = bdrv_find_format(out_fmt);
2328         if (!drv) {
2329             error_report("Unknown file format '%s'", out_fmt);
2330             ret = -1;
2331             goto out;
2332         }
2333 
2334         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2335         if (!proto_drv) {
2336             error_report_err(local_err);
2337             ret = -1;
2338             goto out;
2339         }
2340 
2341         if (!drv->create_opts) {
2342             error_report("Format driver '%s' does not support image creation",
2343                          drv->format_name);
2344             ret = -1;
2345             goto out;
2346         }
2347 
2348         if (!proto_drv->create_opts) {
2349             error_report("Protocol driver '%s' does not support image creation",
2350                          proto_drv->format_name);
2351             ret = -1;
2352             goto out;
2353         }
2354 
2355         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2356         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2357 
2358         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2359         if (options) {
2360             qemu_opts_do_parse(opts, options, NULL, &local_err);
2361             if (local_err) {
2362                 error_report_err(local_err);
2363                 ret = -1;
2364                 goto out;
2365             }
2366         }
2367 
2368         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s.total_sectors * 512,
2369                             &error_abort);
2370         ret = add_old_style_options(out_fmt, opts, out_baseimg, NULL);
2371         if (ret < 0) {
2372             goto out;
2373         }
2374     }
2375 
2376     /* Get backing file name if -o backing_file was used */
2377     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2378     if (out_baseimg_param) {
2379         out_baseimg = out_baseimg_param;
2380     }
2381     s.target_has_backing = (bool) out_baseimg;
2382 
2383     if (s.src_num > 1 && out_baseimg) {
2384         error_report("Having a backing file for the target makes no sense when "
2385                      "concatenating multiple input images");
2386         ret = -1;
2387         goto out;
2388     }
2389 
2390     /* Check if compression is supported */
2391     if (s.compressed) {
2392         bool encryption =
2393             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2394         const char *encryptfmt =
2395             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2396         const char *preallocation =
2397             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2398 
2399         if (drv && !block_driver_can_compress(drv)) {
2400             error_report("Compression not supported for this file format");
2401             ret = -1;
2402             goto out;
2403         }
2404 
2405         if (encryption || encryptfmt) {
2406             error_report("Compression and encryption not supported at "
2407                          "the same time");
2408             ret = -1;
2409             goto out;
2410         }
2411 
2412         if (preallocation
2413             && strcmp(preallocation, "off"))
2414         {
2415             error_report("Compression and preallocation not supported at "
2416                          "the same time");
2417             ret = -1;
2418             goto out;
2419         }
2420     }
2421 
2422     /*
2423      * The later open call will need any decryption secrets, and
2424      * bdrv_create() will purge "opts", so extract them now before
2425      * they are lost.
2426      */
2427     if (!skip_create) {
2428         open_opts = qdict_new();
2429         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2430     }
2431 
2432     if (!skip_create) {
2433         /* Create the new image */
2434         ret = bdrv_create(drv, out_filename, opts, &local_err);
2435         if (ret < 0) {
2436             error_reportf_err(local_err, "%s: error while converting %s: ",
2437                               out_filename, out_fmt);
2438             goto out;
2439         }
2440     }
2441 
2442     s.target_is_new = !skip_create;
2443 
2444     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2445     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2446     if (ret < 0) {
2447         error_report("Invalid cache option: %s", cache);
2448         goto out;
2449     }
2450 
2451     if (skip_create) {
2452         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2453                             flags, writethrough, s.quiet, false);
2454     } else {
2455         /* TODO ultimately we should allow --target-image-opts
2456          * to be used even when -n is not given.
2457          * That has to wait for bdrv_create to be improved
2458          * to allow filenames in option syntax
2459          */
2460         s.target = img_open_file(out_filename, open_opts, out_fmt,
2461                                  flags, writethrough, s.quiet, false);
2462         open_opts = NULL; /* blk_new_open will have freed it */
2463     }
2464     if (!s.target) {
2465         ret = -1;
2466         goto out;
2467     }
2468     out_bs = blk_bs(s.target);
2469 
2470     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2471         error_report("Compression not supported for this file format");
2472         ret = -1;
2473         goto out;
2474     }
2475 
2476     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2477      * or discard_alignment of the out_bs is greater. Limit to
2478      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2479     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2480                         MAX(s.buf_sectors,
2481                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2482                                 out_bs->bl.pdiscard_alignment >>
2483                                 BDRV_SECTOR_BITS)));
2484 
2485     /* try to align the write requests to the destination to avoid unnecessary
2486      * RMW cycles. */
2487     s.alignment = MAX(pow2floor(s.min_sparse),
2488                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2489                                    BDRV_SECTOR_SIZE));
2490     assert(is_power_of_2(s.alignment));
2491 
2492     if (skip_create) {
2493         int64_t output_sectors = blk_nb_sectors(s.target);
2494         if (output_sectors < 0) {
2495             error_report("unable to get output image length: %s",
2496                          strerror(-output_sectors));
2497             ret = -1;
2498             goto out;
2499         } else if (output_sectors < s.total_sectors) {
2500             error_report("output file is smaller than input file");
2501             ret = -1;
2502             goto out;
2503         }
2504     }
2505 
2506     if (s.target_has_backing) {
2507         /* Errors are treated as "backing length unknown" (which means
2508          * s.target_backing_sectors has to be negative, which it will
2509          * be automatically).  The backing file length is used only
2510          * for optimizations, so such a case is not fatal. */
2511         s.target_backing_sectors = bdrv_nb_sectors(out_bs->backing->bs);
2512     } else {
2513         s.target_backing_sectors = -1;
2514     }
2515 
2516     ret = bdrv_get_info(out_bs, &bdi);
2517     if (ret < 0) {
2518         if (s.compressed) {
2519             error_report("could not get block driver info");
2520             goto out;
2521         }
2522     } else {
2523         s.compressed = s.compressed || bdi.needs_compressed_writes;
2524         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2525         s.unallocated_blocks_are_zero = bdi.unallocated_blocks_are_zero;
2526     }
2527 
2528     ret = convert_do_copy(&s);
2529 out:
2530     if (!ret) {
2531         qemu_progress_print(100, 0);
2532     }
2533     qemu_progress_end();
2534     qemu_opts_del(opts);
2535     qemu_opts_free(create_opts);
2536     qemu_opts_del(sn_opts);
2537     qobject_unref(open_opts);
2538     blk_unref(s.target);
2539     if (s.src) {
2540         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2541             blk_unref(s.src[bs_i]);
2542         }
2543         g_free(s.src);
2544     }
2545     g_free(s.src_sectors);
2546 fail_getopt:
2547     g_free(options);
2548 
2549     return !!ret;
2550 }
2551 
2552 
2553 static void dump_snapshots(BlockDriverState *bs)
2554 {
2555     QEMUSnapshotInfo *sn_tab, *sn;
2556     int nb_sns, i;
2557 
2558     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2559     if (nb_sns <= 0)
2560         return;
2561     printf("Snapshot list:\n");
2562     bdrv_snapshot_dump(NULL);
2563     printf("\n");
2564     for(i = 0; i < nb_sns; i++) {
2565         sn = &sn_tab[i];
2566         bdrv_snapshot_dump(sn);
2567         printf("\n");
2568     }
2569     g_free(sn_tab);
2570 }
2571 
2572 static void dump_json_image_info_list(ImageInfoList *list)
2573 {
2574     QString *str;
2575     QObject *obj;
2576     Visitor *v = qobject_output_visitor_new(&obj);
2577 
2578     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
2579     visit_complete(v, &obj);
2580     str = qobject_to_json_pretty(obj);
2581     assert(str != NULL);
2582     printf("%s\n", qstring_get_str(str));
2583     qobject_unref(obj);
2584     visit_free(v);
2585     qobject_unref(str);
2586 }
2587 
2588 static void dump_json_image_info(ImageInfo *info)
2589 {
2590     QString *str;
2591     QObject *obj;
2592     Visitor *v = qobject_output_visitor_new(&obj);
2593 
2594     visit_type_ImageInfo(v, NULL, &info, &error_abort);
2595     visit_complete(v, &obj);
2596     str = qobject_to_json_pretty(obj);
2597     assert(str != NULL);
2598     printf("%s\n", qstring_get_str(str));
2599     qobject_unref(obj);
2600     visit_free(v);
2601     qobject_unref(str);
2602 }
2603 
2604 static void dump_human_image_info_list(ImageInfoList *list)
2605 {
2606     ImageInfoList *elem;
2607     bool delim = false;
2608 
2609     for (elem = list; elem; elem = elem->next) {
2610         if (delim) {
2611             printf("\n");
2612         }
2613         delim = true;
2614 
2615         bdrv_image_info_dump(elem->value);
2616     }
2617 }
2618 
2619 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2620 {
2621     return strcmp(a, b) == 0;
2622 }
2623 
2624 /**
2625  * Open an image file chain and return an ImageInfoList
2626  *
2627  * @filename: topmost image filename
2628  * @fmt: topmost image format (may be NULL to autodetect)
2629  * @chain: true  - enumerate entire backing file chain
2630  *         false - only topmost image file
2631  *
2632  * Returns a list of ImageInfo objects or NULL if there was an error opening an
2633  * image file.  If there was an error a message will have been printed to
2634  * stderr.
2635  */
2636 static ImageInfoList *collect_image_info_list(bool image_opts,
2637                                               const char *filename,
2638                                               const char *fmt,
2639                                               bool chain, bool force_share)
2640 {
2641     ImageInfoList *head = NULL;
2642     ImageInfoList **last = &head;
2643     GHashTable *filenames;
2644     Error *err = NULL;
2645 
2646     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2647 
2648     while (filename) {
2649         BlockBackend *blk;
2650         BlockDriverState *bs;
2651         ImageInfo *info;
2652         ImageInfoList *elem;
2653 
2654         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2655             error_report("Backing file '%s' creates an infinite loop.",
2656                          filename);
2657             goto err;
2658         }
2659         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2660 
2661         blk = img_open(image_opts, filename, fmt,
2662                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2663                        force_share);
2664         if (!blk) {
2665             goto err;
2666         }
2667         bs = blk_bs(blk);
2668 
2669         bdrv_query_image_info(bs, &info, &err);
2670         if (err) {
2671             error_report_err(err);
2672             blk_unref(blk);
2673             goto err;
2674         }
2675 
2676         elem = g_new0(ImageInfoList, 1);
2677         elem->value = info;
2678         *last = elem;
2679         last = &elem->next;
2680 
2681         blk_unref(blk);
2682 
2683         /* Clear parameters that only apply to the topmost image */
2684         filename = fmt = NULL;
2685         image_opts = false;
2686 
2687         if (chain) {
2688             if (info->has_full_backing_filename) {
2689                 filename = info->full_backing_filename;
2690             } else if (info->has_backing_filename) {
2691                 error_report("Could not determine absolute backing filename,"
2692                              " but backing filename '%s' present",
2693                              info->backing_filename);
2694                 goto err;
2695             }
2696             if (info->has_backing_filename_format) {
2697                 fmt = info->backing_filename_format;
2698             }
2699         }
2700     }
2701     g_hash_table_destroy(filenames);
2702     return head;
2703 
2704 err:
2705     qapi_free_ImageInfoList(head);
2706     g_hash_table_destroy(filenames);
2707     return NULL;
2708 }
2709 
2710 static int img_info(int argc, char **argv)
2711 {
2712     int c;
2713     OutputFormat output_format = OFORMAT_HUMAN;
2714     bool chain = false;
2715     const char *filename, *fmt, *output;
2716     ImageInfoList *list;
2717     bool image_opts = false;
2718     bool force_share = false;
2719 
2720     fmt = NULL;
2721     output = NULL;
2722     for(;;) {
2723         int option_index = 0;
2724         static const struct option long_options[] = {
2725             {"help", no_argument, 0, 'h'},
2726             {"format", required_argument, 0, 'f'},
2727             {"output", required_argument, 0, OPTION_OUTPUT},
2728             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
2729             {"object", required_argument, 0, OPTION_OBJECT},
2730             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2731             {"force-share", no_argument, 0, 'U'},
2732             {0, 0, 0, 0}
2733         };
2734         c = getopt_long(argc, argv, ":f:hU",
2735                         long_options, &option_index);
2736         if (c == -1) {
2737             break;
2738         }
2739         switch(c) {
2740         case ':':
2741             missing_argument(argv[optind - 1]);
2742             break;
2743         case '?':
2744             unrecognized_option(argv[optind - 1]);
2745             break;
2746         case 'h':
2747             help();
2748             break;
2749         case 'f':
2750             fmt = optarg;
2751             break;
2752         case 'U':
2753             force_share = true;
2754             break;
2755         case OPTION_OUTPUT:
2756             output = optarg;
2757             break;
2758         case OPTION_BACKING_CHAIN:
2759             chain = true;
2760             break;
2761         case OPTION_OBJECT: {
2762             QemuOpts *opts;
2763             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2764                                            optarg, true);
2765             if (!opts) {
2766                 return 1;
2767             }
2768         }   break;
2769         case OPTION_IMAGE_OPTS:
2770             image_opts = true;
2771             break;
2772         }
2773     }
2774     if (optind != argc - 1) {
2775         error_exit("Expecting one image file name");
2776     }
2777     filename = argv[optind++];
2778 
2779     if (output && !strcmp(output, "json")) {
2780         output_format = OFORMAT_JSON;
2781     } else if (output && !strcmp(output, "human")) {
2782         output_format = OFORMAT_HUMAN;
2783     } else if (output) {
2784         error_report("--output must be used with human or json as argument.");
2785         return 1;
2786     }
2787 
2788     if (qemu_opts_foreach(&qemu_object_opts,
2789                           user_creatable_add_opts_foreach,
2790                           qemu_img_object_print_help, &error_fatal)) {
2791         return 1;
2792     }
2793 
2794     list = collect_image_info_list(image_opts, filename, fmt, chain,
2795                                    force_share);
2796     if (!list) {
2797         return 1;
2798     }
2799 
2800     switch (output_format) {
2801     case OFORMAT_HUMAN:
2802         dump_human_image_info_list(list);
2803         break;
2804     case OFORMAT_JSON:
2805         if (chain) {
2806             dump_json_image_info_list(list);
2807         } else {
2808             dump_json_image_info(list->value);
2809         }
2810         break;
2811     }
2812 
2813     qapi_free_ImageInfoList(list);
2814     return 0;
2815 }
2816 
2817 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
2818                           MapEntry *next)
2819 {
2820     switch (output_format) {
2821     case OFORMAT_HUMAN:
2822         if (e->data && !e->has_offset) {
2823             error_report("File contains external, encrypted or compressed clusters.");
2824             return -1;
2825         }
2826         if (e->data && !e->zero) {
2827             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
2828                    e->start, e->length,
2829                    e->has_offset ? e->offset : 0,
2830                    e->has_filename ? e->filename : "");
2831         }
2832         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
2833          * Modify the flags here to allow more coalescing.
2834          */
2835         if (next && (!next->data || next->zero)) {
2836             next->data = false;
2837             next->zero = true;
2838         }
2839         break;
2840     case OFORMAT_JSON:
2841         printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64","
2842                " \"depth\": %"PRId64", \"zero\": %s, \"data\": %s",
2843                (e->start == 0 ? "[" : ",\n"),
2844                e->start, e->length, e->depth,
2845                e->zero ? "true" : "false",
2846                e->data ? "true" : "false");
2847         if (e->has_offset) {
2848             printf(", \"offset\": %"PRId64"", e->offset);
2849         }
2850         putchar('}');
2851 
2852         if (!next) {
2853             printf("]\n");
2854         }
2855         break;
2856     }
2857     return 0;
2858 }
2859 
2860 static int get_block_status(BlockDriverState *bs, int64_t offset,
2861                             int64_t bytes, MapEntry *e)
2862 {
2863     int ret;
2864     int depth;
2865     BlockDriverState *file;
2866     bool has_offset;
2867     int64_t map;
2868     char *filename = NULL;
2869 
2870     /* As an optimization, we could cache the current range of unallocated
2871      * clusters in each file of the chain, and avoid querying the same
2872      * range repeatedly.
2873      */
2874 
2875     depth = 0;
2876     for (;;) {
2877         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
2878         if (ret < 0) {
2879             return ret;
2880         }
2881         assert(bytes);
2882         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
2883             break;
2884         }
2885         bs = backing_bs(bs);
2886         if (bs == NULL) {
2887             ret = 0;
2888             break;
2889         }
2890 
2891         depth++;
2892     }
2893 
2894     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
2895 
2896     if (file && has_offset) {
2897         bdrv_refresh_filename(file);
2898         filename = file->filename;
2899     }
2900 
2901     *e = (MapEntry) {
2902         .start = offset,
2903         .length = bytes,
2904         .data = !!(ret & BDRV_BLOCK_DATA),
2905         .zero = !!(ret & BDRV_BLOCK_ZERO),
2906         .offset = map,
2907         .has_offset = has_offset,
2908         .depth = depth,
2909         .has_filename = filename,
2910         .filename = filename,
2911     };
2912 
2913     return 0;
2914 }
2915 
2916 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
2917 {
2918     if (curr->length == 0) {
2919         return false;
2920     }
2921     if (curr->zero != next->zero ||
2922         curr->data != next->data ||
2923         curr->depth != next->depth ||
2924         curr->has_filename != next->has_filename ||
2925         curr->has_offset != next->has_offset) {
2926         return false;
2927     }
2928     if (curr->has_filename && strcmp(curr->filename, next->filename)) {
2929         return false;
2930     }
2931     if (curr->has_offset && curr->offset + curr->length != next->offset) {
2932         return false;
2933     }
2934     return true;
2935 }
2936 
2937 static int img_map(int argc, char **argv)
2938 {
2939     int c;
2940     OutputFormat output_format = OFORMAT_HUMAN;
2941     BlockBackend *blk;
2942     BlockDriverState *bs;
2943     const char *filename, *fmt, *output;
2944     int64_t length;
2945     MapEntry curr = { .length = 0 }, next;
2946     int ret = 0;
2947     bool image_opts = false;
2948     bool force_share = false;
2949 
2950     fmt = NULL;
2951     output = NULL;
2952     for (;;) {
2953         int option_index = 0;
2954         static const struct option long_options[] = {
2955             {"help", no_argument, 0, 'h'},
2956             {"format", required_argument, 0, 'f'},
2957             {"output", required_argument, 0, OPTION_OUTPUT},
2958             {"object", required_argument, 0, OPTION_OBJECT},
2959             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2960             {"force-share", no_argument, 0, 'U'},
2961             {0, 0, 0, 0}
2962         };
2963         c = getopt_long(argc, argv, ":f:hU",
2964                         long_options, &option_index);
2965         if (c == -1) {
2966             break;
2967         }
2968         switch (c) {
2969         case ':':
2970             missing_argument(argv[optind - 1]);
2971             break;
2972         case '?':
2973             unrecognized_option(argv[optind - 1]);
2974             break;
2975         case 'h':
2976             help();
2977             break;
2978         case 'f':
2979             fmt = optarg;
2980             break;
2981         case 'U':
2982             force_share = true;
2983             break;
2984         case OPTION_OUTPUT:
2985             output = optarg;
2986             break;
2987         case OPTION_OBJECT: {
2988             QemuOpts *opts;
2989             opts = qemu_opts_parse_noisily(&qemu_object_opts,
2990                                            optarg, true);
2991             if (!opts) {
2992                 return 1;
2993             }
2994         }   break;
2995         case OPTION_IMAGE_OPTS:
2996             image_opts = true;
2997             break;
2998         }
2999     }
3000     if (optind != argc - 1) {
3001         error_exit("Expecting one image file name");
3002     }
3003     filename = argv[optind];
3004 
3005     if (output && !strcmp(output, "json")) {
3006         output_format = OFORMAT_JSON;
3007     } else if (output && !strcmp(output, "human")) {
3008         output_format = OFORMAT_HUMAN;
3009     } else if (output) {
3010         error_report("--output must be used with human or json as argument.");
3011         return 1;
3012     }
3013 
3014     if (qemu_opts_foreach(&qemu_object_opts,
3015                           user_creatable_add_opts_foreach,
3016                           qemu_img_object_print_help, &error_fatal)) {
3017         return 1;
3018     }
3019 
3020     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3021     if (!blk) {
3022         return 1;
3023     }
3024     bs = blk_bs(blk);
3025 
3026     if (output_format == OFORMAT_HUMAN) {
3027         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3028     }
3029 
3030     length = blk_getlength(blk);
3031     while (curr.start + curr.length < length) {
3032         int64_t offset = curr.start + curr.length;
3033         int64_t n;
3034 
3035         /* Probe up to 1 GiB at a time.  */
3036         n = MIN(1 * GiB, length - offset);
3037         ret = get_block_status(bs, offset, n, &next);
3038 
3039         if (ret < 0) {
3040             error_report("Could not read file metadata: %s", strerror(-ret));
3041             goto out;
3042         }
3043 
3044         if (entry_mergeable(&curr, &next)) {
3045             curr.length += next.length;
3046             continue;
3047         }
3048 
3049         if (curr.length > 0) {
3050             ret = dump_map_entry(output_format, &curr, &next);
3051             if (ret < 0) {
3052                 goto out;
3053             }
3054         }
3055         curr = next;
3056     }
3057 
3058     ret = dump_map_entry(output_format, &curr, NULL);
3059 
3060 out:
3061     blk_unref(blk);
3062     return ret < 0;
3063 }
3064 
3065 #define SNAPSHOT_LIST   1
3066 #define SNAPSHOT_CREATE 2
3067 #define SNAPSHOT_APPLY  3
3068 #define SNAPSHOT_DELETE 4
3069 
3070 static int img_snapshot(int argc, char **argv)
3071 {
3072     BlockBackend *blk;
3073     BlockDriverState *bs;
3074     QEMUSnapshotInfo sn;
3075     char *filename, *snapshot_name = NULL;
3076     int c, ret = 0, bdrv_oflags;
3077     int action = 0;
3078     qemu_timeval tv;
3079     bool quiet = false;
3080     Error *err = NULL;
3081     bool image_opts = false;
3082     bool force_share = false;
3083 
3084     bdrv_oflags = BDRV_O_RDWR;
3085     /* Parse commandline parameters */
3086     for(;;) {
3087         static const struct option long_options[] = {
3088             {"help", no_argument, 0, 'h'},
3089             {"object", required_argument, 0, OPTION_OBJECT},
3090             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3091             {"force-share", no_argument, 0, 'U'},
3092             {0, 0, 0, 0}
3093         };
3094         c = getopt_long(argc, argv, ":la:c:d:hqU",
3095                         long_options, NULL);
3096         if (c == -1) {
3097             break;
3098         }
3099         switch(c) {
3100         case ':':
3101             missing_argument(argv[optind - 1]);
3102             break;
3103         case '?':
3104             unrecognized_option(argv[optind - 1]);
3105             break;
3106         case 'h':
3107             help();
3108             return 0;
3109         case 'l':
3110             if (action) {
3111                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3112                 return 0;
3113             }
3114             action = SNAPSHOT_LIST;
3115             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3116             break;
3117         case 'a':
3118             if (action) {
3119                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3120                 return 0;
3121             }
3122             action = SNAPSHOT_APPLY;
3123             snapshot_name = optarg;
3124             break;
3125         case 'c':
3126             if (action) {
3127                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3128                 return 0;
3129             }
3130             action = SNAPSHOT_CREATE;
3131             snapshot_name = optarg;
3132             break;
3133         case 'd':
3134             if (action) {
3135                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3136                 return 0;
3137             }
3138             action = SNAPSHOT_DELETE;
3139             snapshot_name = optarg;
3140             break;
3141         case 'q':
3142             quiet = true;
3143             break;
3144         case 'U':
3145             force_share = true;
3146             break;
3147         case OPTION_OBJECT: {
3148             QemuOpts *opts;
3149             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3150                                            optarg, true);
3151             if (!opts) {
3152                 return 1;
3153             }
3154         }   break;
3155         case OPTION_IMAGE_OPTS:
3156             image_opts = true;
3157             break;
3158         }
3159     }
3160 
3161     if (optind != argc - 1) {
3162         error_exit("Expecting one image file name");
3163     }
3164     filename = argv[optind++];
3165 
3166     if (qemu_opts_foreach(&qemu_object_opts,
3167                           user_creatable_add_opts_foreach,
3168                           qemu_img_object_print_help, &error_fatal)) {
3169         return 1;
3170     }
3171 
3172     /* Open the image */
3173     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3174                    force_share);
3175     if (!blk) {
3176         return 1;
3177     }
3178     bs = blk_bs(blk);
3179 
3180     /* Perform the requested action */
3181     switch(action) {
3182     case SNAPSHOT_LIST:
3183         dump_snapshots(bs);
3184         break;
3185 
3186     case SNAPSHOT_CREATE:
3187         memset(&sn, 0, sizeof(sn));
3188         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3189 
3190         qemu_gettimeofday(&tv);
3191         sn.date_sec = tv.tv_sec;
3192         sn.date_nsec = tv.tv_usec * 1000;
3193 
3194         ret = bdrv_snapshot_create(bs, &sn);
3195         if (ret) {
3196             error_report("Could not create snapshot '%s': %d (%s)",
3197                 snapshot_name, ret, strerror(-ret));
3198         }
3199         break;
3200 
3201     case SNAPSHOT_APPLY:
3202         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3203         if (ret) {
3204             error_reportf_err(err, "Could not apply snapshot '%s': ",
3205                               snapshot_name);
3206         }
3207         break;
3208 
3209     case SNAPSHOT_DELETE:
3210         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3211         if (ret < 0) {
3212             error_report("Could not delete snapshot '%s': snapshot not "
3213                          "found", snapshot_name);
3214             ret = 1;
3215         } else {
3216             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3217             if (ret < 0) {
3218                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3219                                   snapshot_name);
3220                 ret = 1;
3221             }
3222         }
3223         break;
3224     }
3225 
3226     /* Cleanup */
3227     blk_unref(blk);
3228     if (ret) {
3229         return 1;
3230     }
3231     return 0;
3232 }
3233 
3234 static int img_rebase(int argc, char **argv)
3235 {
3236     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3237     uint8_t *buf_old = NULL;
3238     uint8_t *buf_new = NULL;
3239     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3240     char *filename;
3241     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3242     int c, flags, src_flags, ret;
3243     bool writethrough, src_writethrough;
3244     int unsafe = 0;
3245     bool force_share = false;
3246     int progress = 0;
3247     bool quiet = false;
3248     Error *local_err = NULL;
3249     bool image_opts = false;
3250 
3251     /* Parse commandline parameters */
3252     fmt = NULL;
3253     cache = BDRV_DEFAULT_CACHE;
3254     src_cache = BDRV_DEFAULT_CACHE;
3255     out_baseimg = NULL;
3256     out_basefmt = NULL;
3257     for(;;) {
3258         static const struct option long_options[] = {
3259             {"help", no_argument, 0, 'h'},
3260             {"object", required_argument, 0, OPTION_OBJECT},
3261             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3262             {"force-share", no_argument, 0, 'U'},
3263             {0, 0, 0, 0}
3264         };
3265         c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
3266                         long_options, NULL);
3267         if (c == -1) {
3268             break;
3269         }
3270         switch(c) {
3271         case ':':
3272             missing_argument(argv[optind - 1]);
3273             break;
3274         case '?':
3275             unrecognized_option(argv[optind - 1]);
3276             break;
3277         case 'h':
3278             help();
3279             return 0;
3280         case 'f':
3281             fmt = optarg;
3282             break;
3283         case 'F':
3284             out_basefmt = optarg;
3285             break;
3286         case 'b':
3287             out_baseimg = optarg;
3288             break;
3289         case 'u':
3290             unsafe = 1;
3291             break;
3292         case 'p':
3293             progress = 1;
3294             break;
3295         case 't':
3296             cache = optarg;
3297             break;
3298         case 'T':
3299             src_cache = optarg;
3300             break;
3301         case 'q':
3302             quiet = true;
3303             break;
3304         case OPTION_OBJECT: {
3305             QemuOpts *opts;
3306             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3307                                            optarg, true);
3308             if (!opts) {
3309                 return 1;
3310             }
3311         }   break;
3312         case OPTION_IMAGE_OPTS:
3313             image_opts = true;
3314             break;
3315         case 'U':
3316             force_share = true;
3317             break;
3318         }
3319     }
3320 
3321     if (quiet) {
3322         progress = 0;
3323     }
3324 
3325     if (optind != argc - 1) {
3326         error_exit("Expecting one image file name");
3327     }
3328     if (!unsafe && !out_baseimg) {
3329         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3330     }
3331     filename = argv[optind++];
3332 
3333     if (qemu_opts_foreach(&qemu_object_opts,
3334                           user_creatable_add_opts_foreach,
3335                           qemu_img_object_print_help, &error_fatal)) {
3336         return 1;
3337     }
3338 
3339     qemu_progress_init(progress, 2.0);
3340     qemu_progress_print(0, 100);
3341 
3342     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3343     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3344     if (ret < 0) {
3345         error_report("Invalid cache option: %s", cache);
3346         goto out;
3347     }
3348 
3349     src_flags = 0;
3350     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3351     if (ret < 0) {
3352         error_report("Invalid source cache option: %s", src_cache);
3353         goto out;
3354     }
3355 
3356     /* The source files are opened read-only, don't care about WCE */
3357     assert((src_flags & BDRV_O_RDWR) == 0);
3358     (void) src_writethrough;
3359 
3360     /*
3361      * Open the images.
3362      *
3363      * Ignore the old backing file for unsafe rebase in case we want to correct
3364      * the reference to a renamed or moved backing file.
3365      */
3366     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3367                    false);
3368     if (!blk) {
3369         ret = -1;
3370         goto out;
3371     }
3372     bs = blk_bs(blk);
3373 
3374     if (out_basefmt != NULL) {
3375         if (bdrv_find_format(out_basefmt) == NULL) {
3376             error_report("Invalid format name: '%s'", out_basefmt);
3377             ret = -1;
3378             goto out;
3379         }
3380     }
3381 
3382     /* For safe rebasing we need to compare old and new backing file */
3383     if (!unsafe) {
3384         QDict *options = NULL;
3385         BlockDriverState *base_bs = backing_bs(bs);
3386 
3387         if (base_bs) {
3388             blk_old_backing = blk_new(qemu_get_aio_context(),
3389                                       BLK_PERM_CONSISTENT_READ,
3390                                       BLK_PERM_ALL);
3391             ret = blk_insert_bs(blk_old_backing, base_bs,
3392                                 &local_err);
3393             if (ret < 0) {
3394                 error_reportf_err(local_err,
3395                                   "Could not reuse old backing file '%s': ",
3396                                   base_bs->filename);
3397                 goto out;
3398             }
3399         } else {
3400             blk_old_backing = NULL;
3401         }
3402 
3403         if (out_baseimg[0]) {
3404             const char *overlay_filename;
3405             char *out_real_path;
3406 
3407             options = qdict_new();
3408             if (out_basefmt) {
3409                 qdict_put_str(options, "driver", out_basefmt);
3410             }
3411             if (force_share) {
3412                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3413             }
3414 
3415             bdrv_refresh_filename(bs);
3416             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3417                                                      : bs->filename;
3418             out_real_path =
3419                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3420                                                              out_baseimg,
3421                                                              &local_err);
3422             if (local_err) {
3423                 qobject_unref(options);
3424                 error_reportf_err(local_err,
3425                                   "Could not resolve backing filename: ");
3426                 ret = -1;
3427                 goto out;
3428             }
3429 
3430             /*
3431              * Find out whether we rebase an image on top of a previous image
3432              * in its chain.
3433              */
3434             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3435             if (prefix_chain_bs) {
3436                 qobject_unref(options);
3437                 g_free(out_real_path);
3438 
3439                 blk_new_backing = blk_new(qemu_get_aio_context(),
3440                                           BLK_PERM_CONSISTENT_READ,
3441                                           BLK_PERM_ALL);
3442                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3443                                     &local_err);
3444                 if (ret < 0) {
3445                     error_reportf_err(local_err,
3446                                       "Could not reuse backing file '%s': ",
3447                                       out_baseimg);
3448                     goto out;
3449                 }
3450             } else {
3451                 blk_new_backing = blk_new_open(out_real_path, NULL,
3452                                                options, src_flags, &local_err);
3453                 g_free(out_real_path);
3454                 if (!blk_new_backing) {
3455                     error_reportf_err(local_err,
3456                                       "Could not open new backing file '%s': ",
3457                                       out_baseimg);
3458                     ret = -1;
3459                     goto out;
3460                 }
3461             }
3462         }
3463     }
3464 
3465     /*
3466      * Check each unallocated cluster in the COW file. If it is unallocated,
3467      * accesses go to the backing file. We must therefore compare this cluster
3468      * in the old and new backing file, and if they differ we need to copy it
3469      * from the old backing file into the COW file.
3470      *
3471      * If qemu-img crashes during this step, no harm is done. The content of
3472      * the image is the same as the original one at any time.
3473      */
3474     if (!unsafe) {
3475         int64_t size;
3476         int64_t old_backing_size = 0;
3477         int64_t new_backing_size = 0;
3478         uint64_t offset;
3479         int64_t n;
3480         float local_progress = 0;
3481 
3482         buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3483         buf_new = blk_blockalign(blk, IO_BUF_SIZE);
3484 
3485         size = blk_getlength(blk);
3486         if (size < 0) {
3487             error_report("Could not get size of '%s': %s",
3488                          filename, strerror(-size));
3489             ret = -1;
3490             goto out;
3491         }
3492         if (blk_old_backing) {
3493             old_backing_size = blk_getlength(blk_old_backing);
3494             if (old_backing_size < 0) {
3495                 char backing_name[PATH_MAX];
3496 
3497                 bdrv_get_backing_filename(bs, backing_name,
3498                                           sizeof(backing_name));
3499                 error_report("Could not get size of '%s': %s",
3500                              backing_name, strerror(-old_backing_size));
3501                 ret = -1;
3502                 goto out;
3503             }
3504         }
3505         if (blk_new_backing) {
3506             new_backing_size = blk_getlength(blk_new_backing);
3507             if (new_backing_size < 0) {
3508                 error_report("Could not get size of '%s': %s",
3509                              out_baseimg, strerror(-new_backing_size));
3510                 ret = -1;
3511                 goto out;
3512             }
3513         }
3514 
3515         if (size != 0) {
3516             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3517         }
3518 
3519         for (offset = 0; offset < size; offset += n) {
3520             bool buf_old_is_zero = false;
3521 
3522             /* How many bytes can we handle with the next read? */
3523             n = MIN(IO_BUF_SIZE, size - offset);
3524 
3525             /* If the cluster is allocated, we don't need to take action */
3526             ret = bdrv_is_allocated(bs, offset, n, &n);
3527             if (ret < 0) {
3528                 error_report("error while reading image metadata: %s",
3529                              strerror(-ret));
3530                 goto out;
3531             }
3532             if (ret) {
3533                 continue;
3534             }
3535 
3536             if (prefix_chain_bs) {
3537                 /*
3538                  * If cluster wasn't changed since prefix_chain, we don't need
3539                  * to take action
3540                  */
3541                 ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs,
3542                                               false, offset, n, &n);
3543                 if (ret < 0) {
3544                     error_report("error while reading image metadata: %s",
3545                                  strerror(-ret));
3546                     goto out;
3547                 }
3548                 if (!ret) {
3549                     continue;
3550                 }
3551             }
3552 
3553             /*
3554              * Read old and new backing file and take into consideration that
3555              * backing files may be smaller than the COW image.
3556              */
3557             if (offset >= old_backing_size) {
3558                 memset(buf_old, 0, n);
3559                 buf_old_is_zero = true;
3560             } else {
3561                 if (offset + n > old_backing_size) {
3562                     n = old_backing_size - offset;
3563                 }
3564 
3565                 ret = blk_pread(blk_old_backing, offset, buf_old, n);
3566                 if (ret < 0) {
3567                     error_report("error while reading from old backing file");
3568                     goto out;
3569                 }
3570             }
3571 
3572             if (offset >= new_backing_size || !blk_new_backing) {
3573                 memset(buf_new, 0, n);
3574             } else {
3575                 if (offset + n > new_backing_size) {
3576                     n = new_backing_size - offset;
3577                 }
3578 
3579                 ret = blk_pread(blk_new_backing, offset, buf_new, n);
3580                 if (ret < 0) {
3581                     error_report("error while reading from new backing file");
3582                     goto out;
3583                 }
3584             }
3585 
3586             /* If they differ, we need to write to the COW file */
3587             uint64_t written = 0;
3588 
3589             while (written < n) {
3590                 int64_t pnum;
3591 
3592                 if (compare_buffers(buf_old + written, buf_new + written,
3593                                     n - written, &pnum))
3594                 {
3595                     if (buf_old_is_zero) {
3596                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
3597                     } else {
3598                         ret = blk_pwrite(blk, offset + written,
3599                                          buf_old + written, pnum, 0);
3600                     }
3601                     if (ret < 0) {
3602                         error_report("Error while writing to COW image: %s",
3603                             strerror(-ret));
3604                         goto out;
3605                     }
3606                 }
3607 
3608                 written += pnum;
3609             }
3610             qemu_progress_print(local_progress, 100);
3611         }
3612     }
3613 
3614     /*
3615      * Change the backing file. All clusters that are different from the old
3616      * backing file are overwritten in the COW file now, so the visible content
3617      * doesn't change when we switch the backing file.
3618      */
3619     if (out_baseimg && *out_baseimg) {
3620         ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
3621     } else {
3622         ret = bdrv_change_backing_file(bs, NULL, NULL);
3623     }
3624 
3625     if (ret == -ENOSPC) {
3626         error_report("Could not change the backing file to '%s': No "
3627                      "space left in the file header", out_baseimg);
3628     } else if (ret < 0) {
3629         error_report("Could not change the backing file to '%s': %s",
3630             out_baseimg, strerror(-ret));
3631     }
3632 
3633     qemu_progress_print(100, 0);
3634     /*
3635      * TODO At this point it is possible to check if any clusters that are
3636      * allocated in the COW file are the same in the backing file. If so, they
3637      * could be dropped from the COW file. Don't do this before switching the
3638      * backing file, in case of a crash this would lead to corruption.
3639      */
3640 out:
3641     qemu_progress_end();
3642     /* Cleanup */
3643     if (!unsafe) {
3644         blk_unref(blk_old_backing);
3645         blk_unref(blk_new_backing);
3646     }
3647     qemu_vfree(buf_old);
3648     qemu_vfree(buf_new);
3649 
3650     blk_unref(blk);
3651     if (ret) {
3652         return 1;
3653     }
3654     return 0;
3655 }
3656 
3657 static int img_resize(int argc, char **argv)
3658 {
3659     Error *err = NULL;
3660     int c, ret, relative;
3661     const char *filename, *fmt, *size;
3662     int64_t n, total_size, current_size;
3663     bool quiet = false;
3664     BlockBackend *blk = NULL;
3665     PreallocMode prealloc = PREALLOC_MODE_OFF;
3666     QemuOpts *param;
3667 
3668     static QemuOptsList resize_options = {
3669         .name = "resize_options",
3670         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
3671         .desc = {
3672             {
3673                 .name = BLOCK_OPT_SIZE,
3674                 .type = QEMU_OPT_SIZE,
3675                 .help = "Virtual disk size"
3676             }, {
3677                 /* end of list */
3678             }
3679         },
3680     };
3681     bool image_opts = false;
3682     bool shrink = false;
3683 
3684     /* Remove size from argv manually so that negative numbers are not treated
3685      * as options by getopt. */
3686     if (argc < 3) {
3687         error_exit("Not enough arguments");
3688         return 1;
3689     }
3690 
3691     size = argv[--argc];
3692 
3693     /* Parse getopt arguments */
3694     fmt = NULL;
3695     for(;;) {
3696         static const struct option long_options[] = {
3697             {"help", no_argument, 0, 'h'},
3698             {"object", required_argument, 0, OPTION_OBJECT},
3699             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3700             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
3701             {"shrink", no_argument, 0, OPTION_SHRINK},
3702             {0, 0, 0, 0}
3703         };
3704         c = getopt_long(argc, argv, ":f:hq",
3705                         long_options, NULL);
3706         if (c == -1) {
3707             break;
3708         }
3709         switch(c) {
3710         case ':':
3711             missing_argument(argv[optind - 1]);
3712             break;
3713         case '?':
3714             unrecognized_option(argv[optind - 1]);
3715             break;
3716         case 'h':
3717             help();
3718             break;
3719         case 'f':
3720             fmt = optarg;
3721             break;
3722         case 'q':
3723             quiet = true;
3724             break;
3725         case OPTION_OBJECT: {
3726             QemuOpts *opts;
3727             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3728                                            optarg, true);
3729             if (!opts) {
3730                 return 1;
3731             }
3732         }   break;
3733         case OPTION_IMAGE_OPTS:
3734             image_opts = true;
3735             break;
3736         case OPTION_PREALLOCATION:
3737             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
3738                                        PREALLOC_MODE__MAX, NULL);
3739             if (prealloc == PREALLOC_MODE__MAX) {
3740                 error_report("Invalid preallocation mode '%s'", optarg);
3741                 return 1;
3742             }
3743             break;
3744         case OPTION_SHRINK:
3745             shrink = true;
3746             break;
3747         }
3748     }
3749     if (optind != argc - 1) {
3750         error_exit("Expecting image file name and size");
3751     }
3752     filename = argv[optind++];
3753 
3754     if (qemu_opts_foreach(&qemu_object_opts,
3755                           user_creatable_add_opts_foreach,
3756                           qemu_img_object_print_help, &error_fatal)) {
3757         return 1;
3758     }
3759 
3760     /* Choose grow, shrink, or absolute resize mode */
3761     switch (size[0]) {
3762     case '+':
3763         relative = 1;
3764         size++;
3765         break;
3766     case '-':
3767         relative = -1;
3768         size++;
3769         break;
3770     default:
3771         relative = 0;
3772         break;
3773     }
3774 
3775     /* Parse size */
3776     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
3777     qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err);
3778     if (err) {
3779         error_report_err(err);
3780         ret = -1;
3781         qemu_opts_del(param);
3782         goto out;
3783     }
3784     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
3785     qemu_opts_del(param);
3786 
3787     blk = img_open(image_opts, filename, fmt,
3788                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
3789                    false);
3790     if (!blk) {
3791         ret = -1;
3792         goto out;
3793     }
3794 
3795     current_size = blk_getlength(blk);
3796     if (current_size < 0) {
3797         error_report("Failed to inquire current image length: %s",
3798                      strerror(-current_size));
3799         ret = -1;
3800         goto out;
3801     }
3802 
3803     if (relative) {
3804         total_size = current_size + n * relative;
3805     } else {
3806         total_size = n;
3807     }
3808     if (total_size <= 0) {
3809         error_report("New image size must be positive");
3810         ret = -1;
3811         goto out;
3812     }
3813 
3814     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
3815         error_report("Preallocation can only be used for growing images");
3816         ret = -1;
3817         goto out;
3818     }
3819 
3820     if (total_size < current_size && !shrink) {
3821         warn_report("Shrinking an image will delete all data beyond the "
3822                     "shrunken image's end. Before performing such an "
3823                     "operation, make sure there is no important data there.");
3824 
3825         if (g_strcmp0(bdrv_get_format_name(blk_bs(blk)), "raw") != 0) {
3826             error_report(
3827               "Use the --shrink option to perform a shrink operation.");
3828             ret = -1;
3829             goto out;
3830         } else {
3831             warn_report("Using the --shrink option will suppress this message. "
3832                         "Note that future versions of qemu-img may refuse to "
3833                         "shrink images without this option.");
3834         }
3835     }
3836 
3837     /*
3838      * The user expects the image to have the desired size after
3839      * resizing, so pass @exact=true.  It is of no use to report
3840      * success when the image has not actually been resized.
3841      */
3842     ret = blk_truncate(blk, total_size, true, prealloc, &err);
3843     if (!ret) {
3844         qprintf(quiet, "Image resized.\n");
3845     } else {
3846         error_report_err(err);
3847     }
3848 out:
3849     blk_unref(blk);
3850     if (ret) {
3851         return 1;
3852     }
3853     return 0;
3854 }
3855 
3856 static void amend_status_cb(BlockDriverState *bs,
3857                             int64_t offset, int64_t total_work_size,
3858                             void *opaque)
3859 {
3860     qemu_progress_print(100.f * offset / total_work_size, 0);
3861 }
3862 
3863 static int print_amend_option_help(const char *format)
3864 {
3865     BlockDriver *drv;
3866 
3867     /* Find driver and parse its options */
3868     drv = bdrv_find_format(format);
3869     if (!drv) {
3870         error_report("Unknown file format '%s'", format);
3871         return 1;
3872     }
3873 
3874     if (!drv->bdrv_amend_options) {
3875         error_report("Format driver '%s' does not support option amendment",
3876                      format);
3877         return 1;
3878     }
3879 
3880     /* Every driver supporting amendment must have create_opts */
3881     assert(drv->create_opts);
3882 
3883     printf("Creation options for '%s':\n", format);
3884     qemu_opts_print_help(drv->create_opts, false);
3885     printf("\nNote that not all of these options may be amendable.\n");
3886     return 0;
3887 }
3888 
3889 static int img_amend(int argc, char **argv)
3890 {
3891     Error *err = NULL;
3892     int c, ret = 0;
3893     char *options = NULL;
3894     QemuOptsList *create_opts = NULL;
3895     QemuOpts *opts = NULL;
3896     const char *fmt = NULL, *filename, *cache;
3897     int flags;
3898     bool writethrough;
3899     bool quiet = false, progress = false;
3900     BlockBackend *blk = NULL;
3901     BlockDriverState *bs = NULL;
3902     bool image_opts = false;
3903 
3904     cache = BDRV_DEFAULT_CACHE;
3905     for (;;) {
3906         static const struct option long_options[] = {
3907             {"help", no_argument, 0, 'h'},
3908             {"object", required_argument, 0, OPTION_OBJECT},
3909             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3910             {0, 0, 0, 0}
3911         };
3912         c = getopt_long(argc, argv, ":ho:f:t:pq",
3913                         long_options, NULL);
3914         if (c == -1) {
3915             break;
3916         }
3917 
3918         switch (c) {
3919         case ':':
3920             missing_argument(argv[optind - 1]);
3921             break;
3922         case '?':
3923             unrecognized_option(argv[optind - 1]);
3924             break;
3925         case 'h':
3926             help();
3927             break;
3928         case 'o':
3929             if (!is_valid_option_list(optarg)) {
3930                 error_report("Invalid option list: %s", optarg);
3931                 ret = -1;
3932                 goto out_no_progress;
3933             }
3934             if (!options) {
3935                 options = g_strdup(optarg);
3936             } else {
3937                 char *old_options = options;
3938                 options = g_strdup_printf("%s,%s", options, optarg);
3939                 g_free(old_options);
3940             }
3941             break;
3942         case 'f':
3943             fmt = optarg;
3944             break;
3945         case 't':
3946             cache = optarg;
3947             break;
3948         case 'p':
3949             progress = true;
3950             break;
3951         case 'q':
3952             quiet = true;
3953             break;
3954         case OPTION_OBJECT:
3955             opts = qemu_opts_parse_noisily(&qemu_object_opts,
3956                                            optarg, true);
3957             if (!opts) {
3958                 ret = -1;
3959                 goto out_no_progress;
3960             }
3961             break;
3962         case OPTION_IMAGE_OPTS:
3963             image_opts = true;
3964             break;
3965         }
3966     }
3967 
3968     if (!options) {
3969         error_exit("Must specify options (-o)");
3970     }
3971 
3972     if (qemu_opts_foreach(&qemu_object_opts,
3973                           user_creatable_add_opts_foreach,
3974                           qemu_img_object_print_help, &error_fatal)) {
3975         ret = -1;
3976         goto out_no_progress;
3977     }
3978 
3979     if (quiet) {
3980         progress = false;
3981     }
3982     qemu_progress_init(progress, 1.0);
3983 
3984     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
3985     if (fmt && has_help_option(options)) {
3986         /* If a format is explicitly specified (and possibly no filename is
3987          * given), print option help here */
3988         ret = print_amend_option_help(fmt);
3989         goto out;
3990     }
3991 
3992     if (optind != argc - 1) {
3993         error_report("Expecting one image file name");
3994         ret = -1;
3995         goto out;
3996     }
3997 
3998     flags = BDRV_O_RDWR;
3999     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4000     if (ret < 0) {
4001         error_report("Invalid cache option: %s", cache);
4002         goto out;
4003     }
4004 
4005     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4006                    false);
4007     if (!blk) {
4008         ret = -1;
4009         goto out;
4010     }
4011     bs = blk_bs(blk);
4012 
4013     fmt = bs->drv->format_name;
4014 
4015     if (has_help_option(options)) {
4016         /* If the format was auto-detected, print option help here */
4017         ret = print_amend_option_help(fmt);
4018         goto out;
4019     }
4020 
4021     if (!bs->drv->bdrv_amend_options) {
4022         error_report("Format driver '%s' does not support option amendment",
4023                      fmt);
4024         ret = -1;
4025         goto out;
4026     }
4027 
4028     /* Every driver supporting amendment must have create_opts */
4029     assert(bs->drv->create_opts);
4030 
4031     create_opts = qemu_opts_append(create_opts, bs->drv->create_opts);
4032     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4033     qemu_opts_do_parse(opts, options, NULL, &err);
4034     if (err) {
4035         error_report_err(err);
4036         ret = -1;
4037         goto out;
4038     }
4039 
4040     /* In case the driver does not call amend_status_cb() */
4041     qemu_progress_print(0.f, 0);
4042     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, &err);
4043     qemu_progress_print(100.f, 0);
4044     if (ret < 0) {
4045         error_report_err(err);
4046         goto out;
4047     }
4048 
4049 out:
4050     qemu_progress_end();
4051 
4052 out_no_progress:
4053     blk_unref(blk);
4054     qemu_opts_del(opts);
4055     qemu_opts_free(create_opts);
4056     g_free(options);
4057 
4058     if (ret) {
4059         return 1;
4060     }
4061     return 0;
4062 }
4063 
4064 typedef struct BenchData {
4065     BlockBackend *blk;
4066     uint64_t image_size;
4067     bool write;
4068     int bufsize;
4069     int step;
4070     int nrreq;
4071     int n;
4072     int flush_interval;
4073     bool drain_on_flush;
4074     uint8_t *buf;
4075     QEMUIOVector *qiov;
4076 
4077     int in_flight;
4078     bool in_flush;
4079     uint64_t offset;
4080 } BenchData;
4081 
4082 static void bench_undrained_flush_cb(void *opaque, int ret)
4083 {
4084     if (ret < 0) {
4085         error_report("Failed flush request: %s", strerror(-ret));
4086         exit(EXIT_FAILURE);
4087     }
4088 }
4089 
4090 static void bench_cb(void *opaque, int ret)
4091 {
4092     BenchData *b = opaque;
4093     BlockAIOCB *acb;
4094 
4095     if (ret < 0) {
4096         error_report("Failed request: %s", strerror(-ret));
4097         exit(EXIT_FAILURE);
4098     }
4099 
4100     if (b->in_flush) {
4101         /* Just finished a flush with drained queue: Start next requests */
4102         assert(b->in_flight == 0);
4103         b->in_flush = false;
4104     } else if (b->in_flight > 0) {
4105         int remaining = b->n - b->in_flight;
4106 
4107         b->n--;
4108         b->in_flight--;
4109 
4110         /* Time for flush? Drain queue if requested, then flush */
4111         if (b->flush_interval && remaining % b->flush_interval == 0) {
4112             if (!b->in_flight || !b->drain_on_flush) {
4113                 BlockCompletionFunc *cb;
4114 
4115                 if (b->drain_on_flush) {
4116                     b->in_flush = true;
4117                     cb = bench_cb;
4118                 } else {
4119                     cb = bench_undrained_flush_cb;
4120                 }
4121 
4122                 acb = blk_aio_flush(b->blk, cb, b);
4123                 if (!acb) {
4124                     error_report("Failed to issue flush request");
4125                     exit(EXIT_FAILURE);
4126                 }
4127             }
4128             if (b->drain_on_flush) {
4129                 return;
4130             }
4131         }
4132     }
4133 
4134     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4135         int64_t offset = b->offset;
4136         /* blk_aio_* might look for completed I/Os and kick bench_cb
4137          * again, so make sure this operation is counted by in_flight
4138          * and b->offset is ready for the next submission.
4139          */
4140         b->in_flight++;
4141         b->offset += b->step;
4142         b->offset %= b->image_size;
4143         if (b->write) {
4144             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4145         } else {
4146             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4147         }
4148         if (!acb) {
4149             error_report("Failed to issue request");
4150             exit(EXIT_FAILURE);
4151         }
4152     }
4153 }
4154 
4155 static int img_bench(int argc, char **argv)
4156 {
4157     int c, ret = 0;
4158     const char *fmt = NULL, *filename;
4159     bool quiet = false;
4160     bool image_opts = false;
4161     bool is_write = false;
4162     int count = 75000;
4163     int depth = 64;
4164     int64_t offset = 0;
4165     size_t bufsize = 4096;
4166     int pattern = 0;
4167     size_t step = 0;
4168     int flush_interval = 0;
4169     bool drain_on_flush = true;
4170     int64_t image_size;
4171     BlockBackend *blk = NULL;
4172     BenchData data = {};
4173     int flags = 0;
4174     bool writethrough = false;
4175     struct timeval t1, t2;
4176     int i;
4177     bool force_share = false;
4178     size_t buf_size;
4179 
4180     for (;;) {
4181         static const struct option long_options[] = {
4182             {"help", no_argument, 0, 'h'},
4183             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4184             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4185             {"pattern", required_argument, 0, OPTION_PATTERN},
4186             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4187             {"force-share", no_argument, 0, 'U'},
4188             {0, 0, 0, 0}
4189         };
4190         c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:wU", long_options, NULL);
4191         if (c == -1) {
4192             break;
4193         }
4194 
4195         switch (c) {
4196         case ':':
4197             missing_argument(argv[optind - 1]);
4198             break;
4199         case '?':
4200             unrecognized_option(argv[optind - 1]);
4201             break;
4202         case 'h':
4203             help();
4204             break;
4205         case 'c':
4206         {
4207             unsigned long res;
4208 
4209             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4210                 error_report("Invalid request count specified");
4211                 return 1;
4212             }
4213             count = res;
4214             break;
4215         }
4216         case 'd':
4217         {
4218             unsigned long res;
4219 
4220             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4221                 error_report("Invalid queue depth specified");
4222                 return 1;
4223             }
4224             depth = res;
4225             break;
4226         }
4227         case 'f':
4228             fmt = optarg;
4229             break;
4230         case 'n':
4231             flags |= BDRV_O_NATIVE_AIO;
4232             break;
4233         case 'o':
4234         {
4235             offset = cvtnum(optarg);
4236             if (offset < 0) {
4237                 error_report("Invalid offset specified");
4238                 return 1;
4239             }
4240             break;
4241         }
4242             break;
4243         case 'q':
4244             quiet = true;
4245             break;
4246         case 's':
4247         {
4248             int64_t sval;
4249 
4250             sval = cvtnum(optarg);
4251             if (sval < 0 || sval > INT_MAX) {
4252                 error_report("Invalid buffer size specified");
4253                 return 1;
4254             }
4255 
4256             bufsize = sval;
4257             break;
4258         }
4259         case 'S':
4260         {
4261             int64_t sval;
4262 
4263             sval = cvtnum(optarg);
4264             if (sval < 0 || sval > INT_MAX) {
4265                 error_report("Invalid step size specified");
4266                 return 1;
4267             }
4268 
4269             step = sval;
4270             break;
4271         }
4272         case 't':
4273             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4274             if (ret < 0) {
4275                 error_report("Invalid cache mode");
4276                 ret = -1;
4277                 goto out;
4278             }
4279             break;
4280         case 'w':
4281             flags |= BDRV_O_RDWR;
4282             is_write = true;
4283             break;
4284         case 'U':
4285             force_share = true;
4286             break;
4287         case OPTION_PATTERN:
4288         {
4289             unsigned long res;
4290 
4291             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4292                 error_report("Invalid pattern byte specified");
4293                 return 1;
4294             }
4295             pattern = res;
4296             break;
4297         }
4298         case OPTION_FLUSH_INTERVAL:
4299         {
4300             unsigned long res;
4301 
4302             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4303                 error_report("Invalid flush interval specified");
4304                 return 1;
4305             }
4306             flush_interval = res;
4307             break;
4308         }
4309         case OPTION_NO_DRAIN:
4310             drain_on_flush = false;
4311             break;
4312         case OPTION_IMAGE_OPTS:
4313             image_opts = true;
4314             break;
4315         }
4316     }
4317 
4318     if (optind != argc - 1) {
4319         error_exit("Expecting one image file name");
4320     }
4321     filename = argv[argc - 1];
4322 
4323     if (!is_write && flush_interval) {
4324         error_report("--flush-interval is only available in write tests");
4325         ret = -1;
4326         goto out;
4327     }
4328     if (flush_interval && flush_interval < depth) {
4329         error_report("Flush interval can't be smaller than depth");
4330         ret = -1;
4331         goto out;
4332     }
4333 
4334     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4335                    force_share);
4336     if (!blk) {
4337         ret = -1;
4338         goto out;
4339     }
4340 
4341     image_size = blk_getlength(blk);
4342     if (image_size < 0) {
4343         ret = image_size;
4344         goto out;
4345     }
4346 
4347     data = (BenchData) {
4348         .blk            = blk,
4349         .image_size     = image_size,
4350         .bufsize        = bufsize,
4351         .step           = step ?: bufsize,
4352         .nrreq          = depth,
4353         .n              = count,
4354         .offset         = offset,
4355         .write          = is_write,
4356         .flush_interval = flush_interval,
4357         .drain_on_flush = drain_on_flush,
4358     };
4359     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4360            "(starting at offset %" PRId64 ", step size %d)\n",
4361            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4362            data.offset, data.step);
4363     if (flush_interval) {
4364         printf("Sending flush every %d requests\n", flush_interval);
4365     }
4366 
4367     buf_size = data.nrreq * data.bufsize;
4368     data.buf = blk_blockalign(blk, buf_size);
4369     memset(data.buf, pattern, data.nrreq * data.bufsize);
4370 
4371     blk_register_buf(blk, data.buf, buf_size);
4372 
4373     data.qiov = g_new(QEMUIOVector, data.nrreq);
4374     for (i = 0; i < data.nrreq; i++) {
4375         qemu_iovec_init(&data.qiov[i], 1);
4376         qemu_iovec_add(&data.qiov[i],
4377                        data.buf + i * data.bufsize, data.bufsize);
4378     }
4379 
4380     gettimeofday(&t1, NULL);
4381     bench_cb(&data, 0);
4382 
4383     while (data.n > 0) {
4384         main_loop_wait(false);
4385     }
4386     gettimeofday(&t2, NULL);
4387 
4388     printf("Run completed in %3.3f seconds.\n",
4389            (t2.tv_sec - t1.tv_sec)
4390            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4391 
4392 out:
4393     if (data.buf) {
4394         blk_unregister_buf(blk, data.buf);
4395     }
4396     qemu_vfree(data.buf);
4397     blk_unref(blk);
4398 
4399     if (ret) {
4400         return 1;
4401     }
4402     return 0;
4403 }
4404 
4405 #define C_BS      01
4406 #define C_COUNT   02
4407 #define C_IF      04
4408 #define C_OF      010
4409 #define C_SKIP    020
4410 
4411 struct DdInfo {
4412     unsigned int flags;
4413     int64_t count;
4414 };
4415 
4416 struct DdIo {
4417     int bsz;    /* Block size */
4418     char *filename;
4419     uint8_t *buf;
4420     int64_t offset;
4421 };
4422 
4423 struct DdOpts {
4424     const char *name;
4425     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
4426     unsigned int flag;
4427 };
4428 
4429 static int img_dd_bs(const char *arg,
4430                      struct DdIo *in, struct DdIo *out,
4431                      struct DdInfo *dd)
4432 {
4433     int64_t res;
4434 
4435     res = cvtnum(arg);
4436 
4437     if (res <= 0 || res > INT_MAX) {
4438         error_report("invalid number: '%s'", arg);
4439         return 1;
4440     }
4441     in->bsz = out->bsz = res;
4442 
4443     return 0;
4444 }
4445 
4446 static int img_dd_count(const char *arg,
4447                         struct DdIo *in, struct DdIo *out,
4448                         struct DdInfo *dd)
4449 {
4450     dd->count = cvtnum(arg);
4451 
4452     if (dd->count < 0) {
4453         error_report("invalid number: '%s'", arg);
4454         return 1;
4455     }
4456 
4457     return 0;
4458 }
4459 
4460 static int img_dd_if(const char *arg,
4461                      struct DdIo *in, struct DdIo *out,
4462                      struct DdInfo *dd)
4463 {
4464     in->filename = g_strdup(arg);
4465 
4466     return 0;
4467 }
4468 
4469 static int img_dd_of(const char *arg,
4470                      struct DdIo *in, struct DdIo *out,
4471                      struct DdInfo *dd)
4472 {
4473     out->filename = g_strdup(arg);
4474 
4475     return 0;
4476 }
4477 
4478 static int img_dd_skip(const char *arg,
4479                        struct DdIo *in, struct DdIo *out,
4480                        struct DdInfo *dd)
4481 {
4482     in->offset = cvtnum(arg);
4483 
4484     if (in->offset < 0) {
4485         error_report("invalid number: '%s'", arg);
4486         return 1;
4487     }
4488 
4489     return 0;
4490 }
4491 
4492 static int img_dd(int argc, char **argv)
4493 {
4494     int ret = 0;
4495     char *arg = NULL;
4496     char *tmp;
4497     BlockDriver *drv = NULL, *proto_drv = NULL;
4498     BlockBackend *blk1 = NULL, *blk2 = NULL;
4499     QemuOpts *opts = NULL;
4500     QemuOptsList *create_opts = NULL;
4501     Error *local_err = NULL;
4502     bool image_opts = false;
4503     int c, i;
4504     const char *out_fmt = "raw";
4505     const char *fmt = NULL;
4506     int64_t size = 0;
4507     int64_t block_count = 0, out_pos, in_pos;
4508     bool force_share = false;
4509     struct DdInfo dd = {
4510         .flags = 0,
4511         .count = 0,
4512     };
4513     struct DdIo in = {
4514         .bsz = 512, /* Block size is by default 512 bytes */
4515         .filename = NULL,
4516         .buf = NULL,
4517         .offset = 0
4518     };
4519     struct DdIo out = {
4520         .bsz = 512,
4521         .filename = NULL,
4522         .buf = NULL,
4523         .offset = 0
4524     };
4525 
4526     const struct DdOpts options[] = {
4527         { "bs", img_dd_bs, C_BS },
4528         { "count", img_dd_count, C_COUNT },
4529         { "if", img_dd_if, C_IF },
4530         { "of", img_dd_of, C_OF },
4531         { "skip", img_dd_skip, C_SKIP },
4532         { NULL, NULL, 0 }
4533     };
4534     const struct option long_options[] = {
4535         { "help", no_argument, 0, 'h'},
4536         { "object", required_argument, 0, OPTION_OBJECT},
4537         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4538         { "force-share", no_argument, 0, 'U'},
4539         { 0, 0, 0, 0 }
4540     };
4541 
4542     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
4543         if (c == EOF) {
4544             break;
4545         }
4546         switch (c) {
4547         case 'O':
4548             out_fmt = optarg;
4549             break;
4550         case 'f':
4551             fmt = optarg;
4552             break;
4553         case ':':
4554             missing_argument(argv[optind - 1]);
4555             break;
4556         case '?':
4557             unrecognized_option(argv[optind - 1]);
4558             break;
4559         case 'h':
4560             help();
4561             break;
4562         case 'U':
4563             force_share = true;
4564             break;
4565         case OPTION_OBJECT:
4566             if (!qemu_opts_parse_noisily(&qemu_object_opts, optarg, true)) {
4567                 ret = -1;
4568                 goto out;
4569             }
4570             break;
4571         case OPTION_IMAGE_OPTS:
4572             image_opts = true;
4573             break;
4574         }
4575     }
4576 
4577     for (i = optind; i < argc; i++) {
4578         int j;
4579         arg = g_strdup(argv[i]);
4580 
4581         tmp = strchr(arg, '=');
4582         if (tmp == NULL) {
4583             error_report("unrecognized operand %s", arg);
4584             ret = -1;
4585             goto out;
4586         }
4587 
4588         *tmp++ = '\0';
4589 
4590         for (j = 0; options[j].name != NULL; j++) {
4591             if (!strcmp(arg, options[j].name)) {
4592                 break;
4593             }
4594         }
4595         if (options[j].name == NULL) {
4596             error_report("unrecognized operand %s", arg);
4597             ret = -1;
4598             goto out;
4599         }
4600 
4601         if (options[j].f(tmp, &in, &out, &dd) != 0) {
4602             ret = -1;
4603             goto out;
4604         }
4605         dd.flags |= options[j].flag;
4606         g_free(arg);
4607         arg = NULL;
4608     }
4609 
4610     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
4611         error_report("Must specify both input and output files");
4612         ret = -1;
4613         goto out;
4614     }
4615 
4616     if (qemu_opts_foreach(&qemu_object_opts,
4617                           user_creatable_add_opts_foreach,
4618                           qemu_img_object_print_help, &error_fatal)) {
4619         ret = -1;
4620         goto out;
4621     }
4622 
4623     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
4624                     force_share);
4625 
4626     if (!blk1) {
4627         ret = -1;
4628         goto out;
4629     }
4630 
4631     drv = bdrv_find_format(out_fmt);
4632     if (!drv) {
4633         error_report("Unknown file format");
4634         ret = -1;
4635         goto out;
4636     }
4637     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
4638 
4639     if (!proto_drv) {
4640         error_report_err(local_err);
4641         ret = -1;
4642         goto out;
4643     }
4644     if (!drv->create_opts) {
4645         error_report("Format driver '%s' does not support image creation",
4646                      drv->format_name);
4647         ret = -1;
4648         goto out;
4649     }
4650     if (!proto_drv->create_opts) {
4651         error_report("Protocol driver '%s' does not support image creation",
4652                      proto_drv->format_name);
4653         ret = -1;
4654         goto out;
4655     }
4656     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4657     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
4658 
4659     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4660 
4661     size = blk_getlength(blk1);
4662     if (size < 0) {
4663         error_report("Failed to get size for '%s'", in.filename);
4664         ret = -1;
4665         goto out;
4666     }
4667 
4668     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
4669         dd.count * in.bsz < size) {
4670         size = dd.count * in.bsz;
4671     }
4672 
4673     /* Overflow means the specified offset is beyond input image's size */
4674     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4675                               size < in.bsz * in.offset)) {
4676         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
4677     } else {
4678         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
4679                             size - in.bsz * in.offset, &error_abort);
4680     }
4681 
4682     ret = bdrv_create(drv, out.filename, opts, &local_err);
4683     if (ret < 0) {
4684         error_reportf_err(local_err,
4685                           "%s: error while creating output image: ",
4686                           out.filename);
4687         ret = -1;
4688         goto out;
4689     }
4690 
4691     /* TODO, we can't honour --image-opts for the target,
4692      * since it needs to be given in a format compatible
4693      * with the bdrv_create() call above which does not
4694      * support image-opts style.
4695      */
4696     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
4697                          false, false, false);
4698 
4699     if (!blk2) {
4700         ret = -1;
4701         goto out;
4702     }
4703 
4704     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
4705                               size < in.offset * in.bsz)) {
4706         /* We give a warning if the skip option is bigger than the input
4707          * size and create an empty output disk image (i.e. like dd(1)).
4708          */
4709         error_report("%s: cannot skip to specified offset", in.filename);
4710         in_pos = size;
4711     } else {
4712         in_pos = in.offset * in.bsz;
4713     }
4714 
4715     in.buf = g_new(uint8_t, in.bsz);
4716 
4717     for (out_pos = 0; in_pos < size; block_count++) {
4718         int in_ret, out_ret;
4719 
4720         if (in_pos + in.bsz > size) {
4721             in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
4722         } else {
4723             in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
4724         }
4725         if (in_ret < 0) {
4726             error_report("error while reading from input image file: %s",
4727                          strerror(-in_ret));
4728             ret = -1;
4729             goto out;
4730         }
4731         in_pos += in_ret;
4732 
4733         out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
4734 
4735         if (out_ret < 0) {
4736             error_report("error while writing to output image file: %s",
4737                          strerror(-out_ret));
4738             ret = -1;
4739             goto out;
4740         }
4741         out_pos += out_ret;
4742     }
4743 
4744 out:
4745     g_free(arg);
4746     qemu_opts_del(opts);
4747     qemu_opts_free(create_opts);
4748     blk_unref(blk1);
4749     blk_unref(blk2);
4750     g_free(in.filename);
4751     g_free(out.filename);
4752     g_free(in.buf);
4753     g_free(out.buf);
4754 
4755     if (ret) {
4756         return 1;
4757     }
4758     return 0;
4759 }
4760 
4761 static void dump_json_block_measure_info(BlockMeasureInfo *info)
4762 {
4763     QString *str;
4764     QObject *obj;
4765     Visitor *v = qobject_output_visitor_new(&obj);
4766 
4767     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
4768     visit_complete(v, &obj);
4769     str = qobject_to_json_pretty(obj);
4770     assert(str != NULL);
4771     printf("%s\n", qstring_get_str(str));
4772     qobject_unref(obj);
4773     visit_free(v);
4774     qobject_unref(str);
4775 }
4776 
4777 static int img_measure(int argc, char **argv)
4778 {
4779     static const struct option long_options[] = {
4780         {"help", no_argument, 0, 'h'},
4781         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4782         {"object", required_argument, 0, OPTION_OBJECT},
4783         {"output", required_argument, 0, OPTION_OUTPUT},
4784         {"size", required_argument, 0, OPTION_SIZE},
4785         {"force-share", no_argument, 0, 'U'},
4786         {0, 0, 0, 0}
4787     };
4788     OutputFormat output_format = OFORMAT_HUMAN;
4789     BlockBackend *in_blk = NULL;
4790     BlockDriver *drv;
4791     const char *filename = NULL;
4792     const char *fmt = NULL;
4793     const char *out_fmt = "raw";
4794     char *options = NULL;
4795     char *snapshot_name = NULL;
4796     bool force_share = false;
4797     QemuOpts *opts = NULL;
4798     QemuOpts *object_opts = NULL;
4799     QemuOpts *sn_opts = NULL;
4800     QemuOptsList *create_opts = NULL;
4801     bool image_opts = false;
4802     uint64_t img_size = UINT64_MAX;
4803     BlockMeasureInfo *info = NULL;
4804     Error *local_err = NULL;
4805     int ret = 1;
4806     int c;
4807 
4808     while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
4809                             long_options, NULL)) != -1) {
4810         switch (c) {
4811         case '?':
4812         case 'h':
4813             help();
4814             break;
4815         case 'f':
4816             fmt = optarg;
4817             break;
4818         case 'O':
4819             out_fmt = optarg;
4820             break;
4821         case 'o':
4822             if (!is_valid_option_list(optarg)) {
4823                 error_report("Invalid option list: %s", optarg);
4824                 goto out;
4825             }
4826             if (!options) {
4827                 options = g_strdup(optarg);
4828             } else {
4829                 char *old_options = options;
4830                 options = g_strdup_printf("%s,%s", options, optarg);
4831                 g_free(old_options);
4832             }
4833             break;
4834         case 'l':
4835             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
4836                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
4837                                                   optarg, false);
4838                 if (!sn_opts) {
4839                     error_report("Failed in parsing snapshot param '%s'",
4840                                  optarg);
4841                     goto out;
4842                 }
4843             } else {
4844                 snapshot_name = optarg;
4845             }
4846             break;
4847         case 'U':
4848             force_share = true;
4849             break;
4850         case OPTION_OBJECT:
4851             object_opts = qemu_opts_parse_noisily(&qemu_object_opts,
4852                                                   optarg, true);
4853             if (!object_opts) {
4854                 goto out;
4855             }
4856             break;
4857         case OPTION_IMAGE_OPTS:
4858             image_opts = true;
4859             break;
4860         case OPTION_OUTPUT:
4861             if (!strcmp(optarg, "json")) {
4862                 output_format = OFORMAT_JSON;
4863             } else if (!strcmp(optarg, "human")) {
4864                 output_format = OFORMAT_HUMAN;
4865             } else {
4866                 error_report("--output must be used with human or json "
4867                              "as argument.");
4868                 goto out;
4869             }
4870             break;
4871         case OPTION_SIZE:
4872         {
4873             int64_t sval;
4874 
4875             sval = cvtnum(optarg);
4876             if (sval < 0) {
4877                 if (sval == -ERANGE) {
4878                     error_report("Image size must be less than 8 EiB!");
4879                 } else {
4880                     error_report("Invalid image size specified! You may use "
4881                                  "k, M, G, T, P or E suffixes for ");
4882                     error_report("kilobytes, megabytes, gigabytes, terabytes, "
4883                                  "petabytes and exabytes.");
4884                 }
4885                 goto out;
4886             }
4887             img_size = (uint64_t)sval;
4888         }
4889         break;
4890         }
4891     }
4892 
4893     if (qemu_opts_foreach(&qemu_object_opts,
4894                           user_creatable_add_opts_foreach,
4895                           qemu_img_object_print_help, &error_fatal)) {
4896         goto out;
4897     }
4898 
4899     if (argc - optind > 1) {
4900         error_report("At most one filename argument is allowed.");
4901         goto out;
4902     } else if (argc - optind == 1) {
4903         filename = argv[optind];
4904     }
4905 
4906     if (!filename &&
4907         (object_opts || image_opts || fmt || snapshot_name || sn_opts)) {
4908         error_report("--object, --image-opts, -f, and -l "
4909                      "require a filename argument.");
4910         goto out;
4911     }
4912     if (filename && img_size != UINT64_MAX) {
4913         error_report("--size N cannot be used together with a filename.");
4914         goto out;
4915     }
4916     if (!filename && img_size == UINT64_MAX) {
4917         error_report("Either --size N or one filename must be specified.");
4918         goto out;
4919     }
4920 
4921     if (filename) {
4922         in_blk = img_open(image_opts, filename, fmt, 0,
4923                           false, false, force_share);
4924         if (!in_blk) {
4925             goto out;
4926         }
4927 
4928         if (sn_opts) {
4929             bdrv_snapshot_load_tmp(blk_bs(in_blk),
4930                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
4931                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
4932                     &local_err);
4933         } else if (snapshot_name != NULL) {
4934             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
4935                     snapshot_name, &local_err);
4936         }
4937         if (local_err) {
4938             error_reportf_err(local_err, "Failed to load snapshot: ");
4939             goto out;
4940         }
4941     }
4942 
4943     drv = bdrv_find_format(out_fmt);
4944     if (!drv) {
4945         error_report("Unknown file format '%s'", out_fmt);
4946         goto out;
4947     }
4948     if (!drv->create_opts) {
4949         error_report("Format driver '%s' does not support image creation",
4950                      drv->format_name);
4951         goto out;
4952     }
4953 
4954     create_opts = qemu_opts_append(create_opts, drv->create_opts);
4955     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
4956     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
4957     if (options) {
4958         qemu_opts_do_parse(opts, options, NULL, &local_err);
4959         if (local_err) {
4960             error_report_err(local_err);
4961             error_report("Invalid options for file format '%s'", out_fmt);
4962             goto out;
4963         }
4964     }
4965     if (img_size != UINT64_MAX) {
4966         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
4967     }
4968 
4969     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
4970     if (local_err) {
4971         error_report_err(local_err);
4972         goto out;
4973     }
4974 
4975     if (output_format == OFORMAT_HUMAN) {
4976         printf("required size: %" PRIu64 "\n", info->required);
4977         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
4978     } else {
4979         dump_json_block_measure_info(info);
4980     }
4981 
4982     ret = 0;
4983 
4984 out:
4985     qapi_free_BlockMeasureInfo(info);
4986     qemu_opts_del(object_opts);
4987     qemu_opts_del(opts);
4988     qemu_opts_del(sn_opts);
4989     qemu_opts_free(create_opts);
4990     g_free(options);
4991     blk_unref(in_blk);
4992     return ret;
4993 }
4994 
4995 static const img_cmd_t img_cmds[] = {
4996 #define DEF(option, callback, arg_string)        \
4997     { option, callback },
4998 #include "qemu-img-cmds.h"
4999 #undef DEF
5000     { NULL, NULL, },
5001 };
5002 
5003 int main(int argc, char **argv)
5004 {
5005     const img_cmd_t *cmd;
5006     const char *cmdname;
5007     Error *local_error = NULL;
5008     char *trace_file = NULL;
5009     int c;
5010     static const struct option long_options[] = {
5011         {"help", no_argument, 0, 'h'},
5012         {"version", no_argument, 0, 'V'},
5013         {"trace", required_argument, NULL, 'T'},
5014         {0, 0, 0, 0}
5015     };
5016 
5017 #ifdef CONFIG_POSIX
5018     signal(SIGPIPE, SIG_IGN);
5019 #endif
5020 
5021     error_init(argv[0]);
5022     module_call_init(MODULE_INIT_TRACE);
5023     qemu_init_exec_dir(argv[0]);
5024 
5025     if (qemu_init_main_loop(&local_error)) {
5026         error_report_err(local_error);
5027         exit(EXIT_FAILURE);
5028     }
5029 
5030     qcrypto_init(&error_fatal);
5031 
5032     module_call_init(MODULE_INIT_QOM);
5033     bdrv_init();
5034     if (argc < 2) {
5035         error_exit("Not enough arguments");
5036     }
5037 
5038     qemu_add_opts(&qemu_object_opts);
5039     qemu_add_opts(&qemu_source_opts);
5040     qemu_add_opts(&qemu_trace_opts);
5041 
5042     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5043         switch (c) {
5044         case ':':
5045             missing_argument(argv[optind - 1]);
5046             return 0;
5047         case '?':
5048             unrecognized_option(argv[optind - 1]);
5049             return 0;
5050         case 'h':
5051             help();
5052             return 0;
5053         case 'V':
5054             printf(QEMU_IMG_VERSION);
5055             return 0;
5056         case 'T':
5057             g_free(trace_file);
5058             trace_file = trace_opt_parse(optarg);
5059             break;
5060         }
5061     }
5062 
5063     cmdname = argv[optind];
5064 
5065     /* reset getopt_long scanning */
5066     argc -= optind;
5067     if (argc < 1) {
5068         return 0;
5069     }
5070     argv += optind;
5071     qemu_reset_optind();
5072 
5073     if (!trace_init_backends()) {
5074         exit(1);
5075     }
5076     trace_init_file(trace_file);
5077     qemu_set_log(LOG_TRACE);
5078 
5079     /* find the command */
5080     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5081         if (!strcmp(cmdname, cmd->name)) {
5082             return cmd->handler(argc, argv);
5083         }
5084     }
5085 
5086     /* not found */
5087     error_exit("Command not found: %s", cmdname);
5088 }
5089