xref: /openbmc/qemu/qemu-img.c (revision 34aee9c94691f529cd952f9483a6b357ca098042)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu/help-texts.h"
29 #include "qemu/qemu-progress.h"
30 #include "qemu-version.h"
31 #include "qapi/error.h"
32 #include "qapi/qapi-commands-block-core.h"
33 #include "qapi/qapi-visit-block-core.h"
34 #include "qapi/qobject-output-visitor.h"
35 #include "qapi/qmp/qjson.h"
36 #include "qapi/qmp/qdict.h"
37 #include "qemu/cutils.h"
38 #include "qemu/config-file.h"
39 #include "qemu/option.h"
40 #include "qemu/error-report.h"
41 #include "qemu/log.h"
42 #include "qemu/main-loop.h"
43 #include "qemu/module.h"
44 #include "qemu/sockets.h"
45 #include "qemu/units.h"
46 #include "qemu/memalign.h"
47 #include "qom/object_interfaces.h"
48 #include "sysemu/block-backend.h"
49 #include "block/block_int.h"
50 #include "block/blockjob.h"
51 #include "block/dirty-bitmap.h"
52 #include "block/qapi.h"
53 #include "crypto/init.h"
54 #include "trace/control.h"
55 #include "qemu/throttle.h"
56 #include "block/throttle-groups.h"
57 
58 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
59                           "\n" QEMU_COPYRIGHT "\n"
60 
61 typedef struct img_cmd_t {
62     const char *name;
63     int (*handler)(int argc, char **argv);
64 } img_cmd_t;
65 
66 enum {
67     OPTION_OUTPUT = 256,
68     OPTION_BACKING_CHAIN = 257,
69     OPTION_OBJECT = 258,
70     OPTION_IMAGE_OPTS = 259,
71     OPTION_PATTERN = 260,
72     OPTION_FLUSH_INTERVAL = 261,
73     OPTION_NO_DRAIN = 262,
74     OPTION_TARGET_IMAGE_OPTS = 263,
75     OPTION_SIZE = 264,
76     OPTION_PREALLOCATION = 265,
77     OPTION_SHRINK = 266,
78     OPTION_SALVAGE = 267,
79     OPTION_TARGET_IS_ZERO = 268,
80     OPTION_ADD = 269,
81     OPTION_REMOVE = 270,
82     OPTION_CLEAR = 271,
83     OPTION_ENABLE = 272,
84     OPTION_DISABLE = 273,
85     OPTION_MERGE = 274,
86     OPTION_BITMAPS = 275,
87     OPTION_FORCE = 276,
88     OPTION_SKIP_BROKEN = 277,
89 };
90 
91 typedef enum OutputFormat {
92     OFORMAT_JSON,
93     OFORMAT_HUMAN,
94 } OutputFormat;
95 
96 /* Default to cache=writeback as data integrity is not important for qemu-img */
97 #define BDRV_DEFAULT_CACHE "writeback"
98 
99 static void format_print(void *opaque, const char *name)
100 {
101     printf(" %s", name);
102 }
103 
104 static G_NORETURN G_GNUC_PRINTF(1, 2)
105 void error_exit(const char *fmt, ...)
106 {
107     va_list ap;
108 
109     va_start(ap, fmt);
110     error_vreport(fmt, ap);
111     va_end(ap);
112 
113     error_printf("Try 'qemu-img --help' for more information\n");
114     exit(EXIT_FAILURE);
115 }
116 
117 static G_NORETURN
118 void missing_argument(const char *option)
119 {
120     error_exit("missing argument for option '%s'", option);
121 }
122 
123 static G_NORETURN
124 void unrecognized_option(const char *option)
125 {
126     error_exit("unrecognized option '%s'", option);
127 }
128 
129 /* Please keep in synch with docs/tools/qemu-img.rst */
130 static G_NORETURN
131 void help(void)
132 {
133     const char *help_msg =
134            QEMU_IMG_VERSION
135            "usage: qemu-img [standard options] command [command options]\n"
136            "QEMU disk image utility\n"
137            "\n"
138            "    '-h', '--help'       display this help and exit\n"
139            "    '-V', '--version'    output version information and exit\n"
140            "    '-T', '--trace'      [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
141            "                         specify tracing options\n"
142            "\n"
143            "Command syntax:\n"
144 #define DEF(option, callback, arg_string)        \
145            "  " arg_string "\n"
146 #include "qemu-img-cmds.h"
147 #undef DEF
148            "\n"
149            "Command parameters:\n"
150            "  'filename' is a disk image filename\n"
151            "  'objectdef' is a QEMU user creatable object definition. See the qemu(1)\n"
152            "    manual page for a description of the object properties. The most common\n"
153            "    object type is a 'secret', which is used to supply passwords and/or\n"
154            "    encryption keys.\n"
155            "  'fmt' is the disk image format. It is guessed automatically in most cases\n"
156            "  'cache' is the cache mode used to write the output disk image, the valid\n"
157            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
158            "    'directsync' and 'unsafe' (default for convert)\n"
159            "  'src_cache' is the cache mode used to read input disk images, the valid\n"
160            "    options are the same as for the 'cache' option\n"
161            "  'size' is the disk image size in bytes. Optional suffixes\n"
162            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
163            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
164            "    supported. 'b' is ignored.\n"
165            "  'output_filename' is the destination disk image filename\n"
166            "  'output_fmt' is the destination format\n"
167            "  'options' is a comma separated list of format specific options in a\n"
168            "    name=value format. Use -o help for an overview of the options supported by\n"
169            "    the used format\n"
170            "  'snapshot_param' is param used for internal snapshot, format\n"
171            "    is 'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
172            "    '[ID_OR_NAME]'\n"
173            "  '-c' indicates that target image must be compressed (qcow format only)\n"
174            "  '-u' allows unsafe backing chains. For rebasing, it is assumed that old and\n"
175            "       new backing file match exactly. The image doesn't need a working\n"
176            "       backing file before rebasing in this case (useful for renaming the\n"
177            "       backing file). For image creation, allow creating without attempting\n"
178            "       to open the backing file.\n"
179            "  '-h' with or without a command shows this help and lists the supported formats\n"
180            "  '-p' show progress of command (only certain commands)\n"
181            "  '-q' use Quiet mode - do not print any output (except errors)\n"
182            "  '-S' indicates the consecutive number of bytes (defaults to 4k) that must\n"
183            "       contain only zeros for qemu-img to create a sparse image during\n"
184            "       conversion. If the number of bytes is 0, the source will not be scanned for\n"
185            "       unallocated or zero sectors, and the destination image will always be\n"
186            "       fully allocated\n"
187            "  '--output' takes the format in which the output must be done (human or json)\n"
188            "  '-n' skips the target volume creation (useful if the volume is created\n"
189            "       prior to running qemu-img)\n"
190            "\n"
191            "Parameters to bitmap subcommand:\n"
192            "  'bitmap' is the name of the bitmap to manipulate, through one or more\n"
193            "       actions from '--add', '--remove', '--clear', '--enable', '--disable',\n"
194            "       or '--merge source'\n"
195            "  '-g granularity' sets the granularity for '--add' actions\n"
196            "  '-b source' and '-F src_fmt' tell '--merge' actions to find the source\n"
197            "       bitmaps from an alternative file\n"
198            "\n"
199            "Parameters to check subcommand:\n"
200            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
201            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
202            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
203            "       hiding corruption that has already occurred.\n"
204            "\n"
205            "Parameters to convert subcommand:\n"
206            "  '--bitmaps' copies all top-level persistent bitmaps to destination\n"
207            "  '-m' specifies how many coroutines work in parallel during the convert\n"
208            "       process (defaults to 8)\n"
209            "  '-W' allow to write to the target out of order rather than sequential\n"
210            "\n"
211            "Parameters to snapshot subcommand:\n"
212            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
213            "  '-a' applies a snapshot (revert disk to saved state)\n"
214            "  '-c' creates a snapshot\n"
215            "  '-d' deletes a snapshot\n"
216            "  '-l' lists all snapshots in the given image\n"
217            "\n"
218            "Parameters to compare subcommand:\n"
219            "  '-f' first image format\n"
220            "  '-F' second image format\n"
221            "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
222            "\n"
223            "Parameters to dd subcommand:\n"
224            "  'bs=BYTES' read and write up to BYTES bytes at a time "
225            "(default: 512)\n"
226            "  'count=N' copy only N input blocks\n"
227            "  'if=FILE' read from FILE\n"
228            "  'of=FILE' write to FILE\n"
229            "  'skip=N' skip N bs-sized blocks at the start of input\n";
230 
231     printf("%s\nSupported formats:", help_msg);
232     bdrv_iterate_format(format_print, NULL, false);
233     printf("\n\n" QEMU_HELP_BOTTOM "\n");
234     exit(EXIT_SUCCESS);
235 }
236 
237 /*
238  * Is @list safe for accumulate_options()?
239  * It is when multiple of them can be joined together separated by ','.
240  * To make that work, @list must not start with ',' (or else a
241  * separating ',' preceding it gets escaped), and it must not end with
242  * an odd number of ',' (or else a separating ',' following it gets
243  * escaped), or be empty (or else a separating ',' preceding it can
244  * escape a separating ',' following it).
245  *
246  */
247 static bool is_valid_option_list(const char *list)
248 {
249     size_t len = strlen(list);
250     size_t i;
251 
252     if (!list[0] || list[0] == ',') {
253         return false;
254     }
255 
256     for (i = len; i > 0 && list[i - 1] == ','; i--) {
257     }
258     if ((len - i) % 2) {
259         return false;
260     }
261 
262     return true;
263 }
264 
265 static int accumulate_options(char **options, char *list)
266 {
267     char *new_options;
268 
269     if (!is_valid_option_list(list)) {
270         error_report("Invalid option list: %s", list);
271         return -1;
272     }
273 
274     if (!*options) {
275         *options = g_strdup(list);
276     } else {
277         new_options = g_strdup_printf("%s,%s", *options, list);
278         g_free(*options);
279         *options = new_options;
280     }
281     return 0;
282 }
283 
284 static QemuOptsList qemu_source_opts = {
285     .name = "source",
286     .implied_opt_name = "file",
287     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
288     .desc = {
289         { }
290     },
291 };
292 
293 static int G_GNUC_PRINTF(2, 3) qprintf(bool quiet, const char *fmt, ...)
294 {
295     int ret = 0;
296     if (!quiet) {
297         va_list args;
298         va_start(args, fmt);
299         ret = vprintf(fmt, args);
300         va_end(args);
301     }
302     return ret;
303 }
304 
305 
306 static int print_block_option_help(const char *filename, const char *fmt)
307 {
308     BlockDriver *drv, *proto_drv;
309     QemuOptsList *create_opts = NULL;
310     Error *local_err = NULL;
311 
312     /* Find driver and parse its options */
313     drv = bdrv_find_format(fmt);
314     if (!drv) {
315         error_report("Unknown file format '%s'", fmt);
316         return 1;
317     }
318 
319     if (!drv->create_opts) {
320         error_report("Format driver '%s' does not support image creation", fmt);
321         return 1;
322     }
323 
324     create_opts = qemu_opts_append(create_opts, drv->create_opts);
325     if (filename) {
326         proto_drv = bdrv_find_protocol(filename, true, &local_err);
327         if (!proto_drv) {
328             error_report_err(local_err);
329             qemu_opts_free(create_opts);
330             return 1;
331         }
332         if (!proto_drv->create_opts) {
333             error_report("Protocol driver '%s' does not support image creation",
334                          proto_drv->format_name);
335             qemu_opts_free(create_opts);
336             return 1;
337         }
338         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
339     }
340 
341     if (filename) {
342         printf("Supported options:\n");
343     } else {
344         printf("Supported %s options:\n", fmt);
345     }
346     qemu_opts_print_help(create_opts, false);
347     qemu_opts_free(create_opts);
348 
349     if (!filename) {
350         printf("\n"
351                "The protocol level may support further options.\n"
352                "Specify the target filename to include those options.\n");
353     }
354 
355     return 0;
356 }
357 
358 
359 static BlockBackend *img_open_opts(const char *optstr,
360                                    QemuOpts *opts, int flags, bool writethrough,
361                                    bool quiet, bool force_share)
362 {
363     QDict *options;
364     Error *local_err = NULL;
365     BlockBackend *blk;
366     options = qemu_opts_to_qdict(opts, NULL);
367     if (force_share) {
368         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
369             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
370             error_report("--force-share/-U conflicts with image options");
371             qobject_unref(options);
372             return NULL;
373         }
374         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
375     }
376     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
377     if (!blk) {
378         error_reportf_err(local_err, "Could not open '%s': ", optstr);
379         return NULL;
380     }
381     blk_set_enable_write_cache(blk, !writethrough);
382 
383     return blk;
384 }
385 
386 static BlockBackend *img_open_file(const char *filename,
387                                    QDict *options,
388                                    const char *fmt, int flags,
389                                    bool writethrough, bool quiet,
390                                    bool force_share)
391 {
392     BlockBackend *blk;
393     Error *local_err = NULL;
394 
395     if (!options) {
396         options = qdict_new();
397     }
398     if (fmt) {
399         qdict_put_str(options, "driver", fmt);
400     }
401 
402     if (force_share) {
403         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
404     }
405     blk = blk_new_open(filename, NULL, options, flags, &local_err);
406     if (!blk) {
407         error_reportf_err(local_err, "Could not open '%s': ", filename);
408         return NULL;
409     }
410     blk_set_enable_write_cache(blk, !writethrough);
411 
412     return blk;
413 }
414 
415 
416 static int img_add_key_secrets(void *opaque,
417                                const char *name, const char *value,
418                                Error **errp)
419 {
420     QDict *options = opaque;
421 
422     if (g_str_has_suffix(name, "key-secret")) {
423         qdict_put_str(options, name, value);
424     }
425 
426     return 0;
427 }
428 
429 
430 static BlockBackend *img_open(bool image_opts,
431                               const char *filename,
432                               const char *fmt, int flags, bool writethrough,
433                               bool quiet, bool force_share)
434 {
435     BlockBackend *blk;
436     if (image_opts) {
437         QemuOpts *opts;
438         if (fmt) {
439             error_report("--image-opts and --format are mutually exclusive");
440             return NULL;
441         }
442         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
443                                        filename, true);
444         if (!opts) {
445             return NULL;
446         }
447         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
448                             force_share);
449     } else {
450         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
451                             force_share);
452     }
453 
454     if (blk) {
455         blk_set_force_allow_inactivate(blk);
456     }
457 
458     return blk;
459 }
460 
461 
462 static int add_old_style_options(const char *fmt, QemuOpts *opts,
463                                  const char *base_filename,
464                                  const char *base_fmt)
465 {
466     if (base_filename) {
467         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
468                           NULL)) {
469             error_report("Backing file not supported for file format '%s'",
470                          fmt);
471             return -1;
472         }
473     }
474     if (base_fmt) {
475         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
476             error_report("Backing file format not supported for file "
477                          "format '%s'", fmt);
478             return -1;
479         }
480     }
481     return 0;
482 }
483 
484 static int64_t cvtnum_full(const char *name, const char *value, int64_t min,
485                            int64_t max)
486 {
487     int err;
488     uint64_t res;
489 
490     err = qemu_strtosz(value, NULL, &res);
491     if (err < 0 && err != -ERANGE) {
492         error_report("Invalid %s specified. You may use "
493                      "k, M, G, T, P or E suffixes for", name);
494         error_report("kilobytes, megabytes, gigabytes, terabytes, "
495                      "petabytes and exabytes.");
496         return err;
497     }
498     if (err == -ERANGE || res > max || res < min) {
499         error_report("Invalid %s specified. Must be between %" PRId64
500                      " and %" PRId64 ".", name, min, max);
501         return -ERANGE;
502     }
503     return res;
504 }
505 
506 static int64_t cvtnum(const char *name, const char *value)
507 {
508     return cvtnum_full(name, value, 0, INT64_MAX);
509 }
510 
511 static int img_create(int argc, char **argv)
512 {
513     int c;
514     uint64_t img_size = -1;
515     const char *fmt = "raw";
516     const char *base_fmt = NULL;
517     const char *filename;
518     const char *base_filename = NULL;
519     char *options = NULL;
520     Error *local_err = NULL;
521     bool quiet = false;
522     int flags = 0;
523 
524     for(;;) {
525         static const struct option long_options[] = {
526             {"help", no_argument, 0, 'h'},
527             {"object", required_argument, 0, OPTION_OBJECT},
528             {0, 0, 0, 0}
529         };
530         c = getopt_long(argc, argv, ":F:b:f:ho:qu",
531                         long_options, NULL);
532         if (c == -1) {
533             break;
534         }
535         switch(c) {
536         case ':':
537             missing_argument(argv[optind - 1]);
538             break;
539         case '?':
540             unrecognized_option(argv[optind - 1]);
541             break;
542         case 'h':
543             help();
544             break;
545         case 'F':
546             base_fmt = optarg;
547             break;
548         case 'b':
549             base_filename = optarg;
550             break;
551         case 'f':
552             fmt = optarg;
553             break;
554         case 'o':
555             if (accumulate_options(&options, optarg) < 0) {
556                 goto fail;
557             }
558             break;
559         case 'q':
560             quiet = true;
561             break;
562         case 'u':
563             flags |= BDRV_O_NO_BACKING;
564             break;
565         case OPTION_OBJECT:
566             user_creatable_process_cmdline(optarg);
567             break;
568         }
569     }
570 
571     /* Get the filename */
572     filename = (optind < argc) ? argv[optind] : NULL;
573     if (options && has_help_option(options)) {
574         g_free(options);
575         return print_block_option_help(filename, fmt);
576     }
577 
578     if (optind >= argc) {
579         error_exit("Expecting image file name");
580     }
581     optind++;
582 
583     /* Get image size, if specified */
584     if (optind < argc) {
585         int64_t sval;
586 
587         sval = cvtnum("image size", argv[optind++]);
588         if (sval < 0) {
589             goto fail;
590         }
591         img_size = (uint64_t)sval;
592     }
593     if (optind != argc) {
594         error_exit("Unexpected argument: %s", argv[optind]);
595     }
596 
597     bdrv_img_create(filename, fmt, base_filename, base_fmt,
598                     options, img_size, flags, quiet, &local_err);
599     if (local_err) {
600         error_reportf_err(local_err, "%s: ", filename);
601         goto fail;
602     }
603 
604     g_free(options);
605     return 0;
606 
607 fail:
608     g_free(options);
609     return 1;
610 }
611 
612 static void dump_json_image_check(ImageCheck *check, bool quiet)
613 {
614     GString *str;
615     QObject *obj;
616     Visitor *v = qobject_output_visitor_new(&obj);
617 
618     visit_type_ImageCheck(v, NULL, &check, &error_abort);
619     visit_complete(v, &obj);
620     str = qobject_to_json_pretty(obj, true);
621     assert(str != NULL);
622     qprintf(quiet, "%s\n", str->str);
623     qobject_unref(obj);
624     visit_free(v);
625     g_string_free(str, true);
626 }
627 
628 static void dump_human_image_check(ImageCheck *check, bool quiet)
629 {
630     if (!(check->corruptions || check->leaks || check->check_errors)) {
631         qprintf(quiet, "No errors were found on the image.\n");
632     } else {
633         if (check->corruptions) {
634             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
635                     "Data may be corrupted, or further writes to the image "
636                     "may corrupt it.\n",
637                     check->corruptions);
638         }
639 
640         if (check->leaks) {
641             qprintf(quiet,
642                     "\n%" PRId64 " leaked clusters were found on the image.\n"
643                     "This means waste of disk space, but no harm to data.\n",
644                     check->leaks);
645         }
646 
647         if (check->check_errors) {
648             qprintf(quiet,
649                     "\n%" PRId64
650                     " internal errors have occurred during the check.\n",
651                     check->check_errors);
652         }
653     }
654 
655     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
656         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
657                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
658                 check->allocated_clusters, check->total_clusters,
659                 check->allocated_clusters * 100.0 / check->total_clusters,
660                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
661                 check->compressed_clusters * 100.0 /
662                 check->allocated_clusters);
663     }
664 
665     if (check->image_end_offset) {
666         qprintf(quiet,
667                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
668     }
669 }
670 
671 static int collect_image_check(BlockDriverState *bs,
672                    ImageCheck *check,
673                    const char *filename,
674                    const char *fmt,
675                    int fix)
676 {
677     int ret;
678     BdrvCheckResult result;
679 
680     ret = bdrv_check(bs, &result, fix);
681     if (ret < 0) {
682         return ret;
683     }
684 
685     check->filename                 = g_strdup(filename);
686     check->format                   = g_strdup(bdrv_get_format_name(bs));
687     check->check_errors             = result.check_errors;
688     check->corruptions              = result.corruptions;
689     check->has_corruptions          = result.corruptions != 0;
690     check->leaks                    = result.leaks;
691     check->has_leaks                = result.leaks != 0;
692     check->corruptions_fixed        = result.corruptions_fixed;
693     check->has_corruptions_fixed    = result.corruptions_fixed != 0;
694     check->leaks_fixed              = result.leaks_fixed;
695     check->has_leaks_fixed          = result.leaks_fixed != 0;
696     check->image_end_offset         = result.image_end_offset;
697     check->has_image_end_offset     = result.image_end_offset != 0;
698     check->total_clusters           = result.bfi.total_clusters;
699     check->has_total_clusters       = result.bfi.total_clusters != 0;
700     check->allocated_clusters       = result.bfi.allocated_clusters;
701     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
702     check->fragmented_clusters      = result.bfi.fragmented_clusters;
703     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
704     check->compressed_clusters      = result.bfi.compressed_clusters;
705     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
706 
707     return 0;
708 }
709 
710 /*
711  * Checks an image for consistency. Exit codes:
712  *
713  *  0 - Check completed, image is good
714  *  1 - Check not completed because of internal errors
715  *  2 - Check completed, image is corrupted
716  *  3 - Check completed, image has leaked clusters, but is good otherwise
717  * 63 - Checks are not supported by the image format
718  */
719 static int img_check(int argc, char **argv)
720 {
721     int c, ret;
722     OutputFormat output_format = OFORMAT_HUMAN;
723     const char *filename, *fmt, *output, *cache;
724     BlockBackend *blk;
725     BlockDriverState *bs;
726     int fix = 0;
727     int flags = BDRV_O_CHECK;
728     bool writethrough;
729     ImageCheck *check;
730     bool quiet = false;
731     bool image_opts = false;
732     bool force_share = false;
733 
734     fmt = NULL;
735     output = NULL;
736     cache = BDRV_DEFAULT_CACHE;
737 
738     for(;;) {
739         int option_index = 0;
740         static const struct option long_options[] = {
741             {"help", no_argument, 0, 'h'},
742             {"format", required_argument, 0, 'f'},
743             {"repair", required_argument, 0, 'r'},
744             {"output", required_argument, 0, OPTION_OUTPUT},
745             {"object", required_argument, 0, OPTION_OBJECT},
746             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
747             {"force-share", no_argument, 0, 'U'},
748             {0, 0, 0, 0}
749         };
750         c = getopt_long(argc, argv, ":hf:r:T:qU",
751                         long_options, &option_index);
752         if (c == -1) {
753             break;
754         }
755         switch(c) {
756         case ':':
757             missing_argument(argv[optind - 1]);
758             break;
759         case '?':
760             unrecognized_option(argv[optind - 1]);
761             break;
762         case 'h':
763             help();
764             break;
765         case 'f':
766             fmt = optarg;
767             break;
768         case 'r':
769             flags |= BDRV_O_RDWR;
770 
771             if (!strcmp(optarg, "leaks")) {
772                 fix = BDRV_FIX_LEAKS;
773             } else if (!strcmp(optarg, "all")) {
774                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
775             } else {
776                 error_exit("Unknown option value for -r "
777                            "(expecting 'leaks' or 'all'): %s", optarg);
778             }
779             break;
780         case OPTION_OUTPUT:
781             output = optarg;
782             break;
783         case 'T':
784             cache = optarg;
785             break;
786         case 'q':
787             quiet = true;
788             break;
789         case 'U':
790             force_share = true;
791             break;
792         case OPTION_OBJECT:
793             user_creatable_process_cmdline(optarg);
794             break;
795         case OPTION_IMAGE_OPTS:
796             image_opts = true;
797             break;
798         }
799     }
800     if (optind != argc - 1) {
801         error_exit("Expecting one image file name");
802     }
803     filename = argv[optind++];
804 
805     if (output && !strcmp(output, "json")) {
806         output_format = OFORMAT_JSON;
807     } else if (output && !strcmp(output, "human")) {
808         output_format = OFORMAT_HUMAN;
809     } else if (output) {
810         error_report("--output must be used with human or json as argument.");
811         return 1;
812     }
813 
814     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
815     if (ret < 0) {
816         error_report("Invalid source cache option: %s", cache);
817         return 1;
818     }
819 
820     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
821                    force_share);
822     if (!blk) {
823         return 1;
824     }
825     bs = blk_bs(blk);
826 
827     check = g_new0(ImageCheck, 1);
828     ret = collect_image_check(bs, check, filename, fmt, fix);
829 
830     if (ret == -ENOTSUP) {
831         error_report("This image format does not support checks");
832         ret = 63;
833         goto fail;
834     }
835 
836     if (check->corruptions_fixed || check->leaks_fixed) {
837         int corruptions_fixed, leaks_fixed;
838         bool has_leaks_fixed, has_corruptions_fixed;
839 
840         leaks_fixed         = check->leaks_fixed;
841         has_leaks_fixed     = check->has_leaks_fixed;
842         corruptions_fixed   = check->corruptions_fixed;
843         has_corruptions_fixed = check->has_corruptions_fixed;
844 
845         if (output_format == OFORMAT_HUMAN) {
846             qprintf(quiet,
847                     "The following inconsistencies were found and repaired:\n\n"
848                     "    %" PRId64 " leaked clusters\n"
849                     "    %" PRId64 " corruptions\n\n"
850                     "Double checking the fixed image now...\n",
851                     check->leaks_fixed,
852                     check->corruptions_fixed);
853         }
854 
855         qapi_free_ImageCheck(check);
856         check = g_new0(ImageCheck, 1);
857         ret = collect_image_check(bs, check, filename, fmt, 0);
858 
859         check->leaks_fixed          = leaks_fixed;
860         check->has_leaks_fixed      = has_leaks_fixed;
861         check->corruptions_fixed    = corruptions_fixed;
862         check->has_corruptions_fixed = has_corruptions_fixed;
863     }
864 
865     if (!ret) {
866         switch (output_format) {
867         case OFORMAT_HUMAN:
868             dump_human_image_check(check, quiet);
869             break;
870         case OFORMAT_JSON:
871             dump_json_image_check(check, quiet);
872             break;
873         }
874     }
875 
876     if (ret || check->check_errors) {
877         if (ret) {
878             error_report("Check failed: %s", strerror(-ret));
879         } else {
880             error_report("Check failed");
881         }
882         ret = 1;
883         goto fail;
884     }
885 
886     if (check->corruptions) {
887         ret = 2;
888     } else if (check->leaks) {
889         ret = 3;
890     } else {
891         ret = 0;
892     }
893 
894 fail:
895     qapi_free_ImageCheck(check);
896     blk_unref(blk);
897     return ret;
898 }
899 
900 typedef struct CommonBlockJobCBInfo {
901     BlockDriverState *bs;
902     Error **errp;
903 } CommonBlockJobCBInfo;
904 
905 static void common_block_job_cb(void *opaque, int ret)
906 {
907     CommonBlockJobCBInfo *cbi = opaque;
908 
909     if (ret < 0) {
910         error_setg_errno(cbi->errp, -ret, "Block job failed");
911     }
912 }
913 
914 static void run_block_job(BlockJob *job, Error **errp)
915 {
916     uint64_t progress_current, progress_total;
917     AioContext *aio_context = block_job_get_aio_context(job);
918     int ret = 0;
919 
920     job_lock();
921     job_ref_locked(&job->job);
922     do {
923         float progress = 0.0f;
924         job_unlock();
925         aio_poll(aio_context, true);
926 
927         progress_get_snapshot(&job->job.progress, &progress_current,
928                               &progress_total);
929         if (progress_total) {
930             progress = (float)progress_current / progress_total * 100.f;
931         }
932         qemu_progress_print(progress, 0);
933         job_lock();
934     } while (!job_is_ready_locked(&job->job) &&
935              !job_is_completed_locked(&job->job));
936 
937     if (!job_is_completed_locked(&job->job)) {
938         ret = job_complete_sync_locked(&job->job, errp);
939     } else {
940         ret = job->job.ret;
941     }
942     job_unref_locked(&job->job);
943     job_unlock();
944 
945     /* publish completion progress only when success */
946     if (!ret) {
947         qemu_progress_print(100.f, 0);
948     }
949 }
950 
951 static int img_commit(int argc, char **argv)
952 {
953     int c, ret, flags;
954     const char *filename, *fmt, *cache, *base;
955     BlockBackend *blk;
956     BlockDriverState *bs, *base_bs;
957     BlockJob *job;
958     bool progress = false, quiet = false, drop = false;
959     bool writethrough;
960     Error *local_err = NULL;
961     CommonBlockJobCBInfo cbi;
962     bool image_opts = false;
963     AioContext *aio_context;
964     int64_t rate_limit = 0;
965 
966     fmt = NULL;
967     cache = BDRV_DEFAULT_CACHE;
968     base = NULL;
969     for(;;) {
970         static const struct option long_options[] = {
971             {"help", no_argument, 0, 'h'},
972             {"object", required_argument, 0, OPTION_OBJECT},
973             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
974             {0, 0, 0, 0}
975         };
976         c = getopt_long(argc, argv, ":f:ht:b:dpqr:",
977                         long_options, NULL);
978         if (c == -1) {
979             break;
980         }
981         switch(c) {
982         case ':':
983             missing_argument(argv[optind - 1]);
984             break;
985         case '?':
986             unrecognized_option(argv[optind - 1]);
987             break;
988         case 'h':
989             help();
990             break;
991         case 'f':
992             fmt = optarg;
993             break;
994         case 't':
995             cache = optarg;
996             break;
997         case 'b':
998             base = optarg;
999             /* -b implies -d */
1000             drop = true;
1001             break;
1002         case 'd':
1003             drop = true;
1004             break;
1005         case 'p':
1006             progress = true;
1007             break;
1008         case 'q':
1009             quiet = true;
1010             break;
1011         case 'r':
1012             rate_limit = cvtnum("rate limit", optarg);
1013             if (rate_limit < 0) {
1014                 return 1;
1015             }
1016             break;
1017         case OPTION_OBJECT:
1018             user_creatable_process_cmdline(optarg);
1019             break;
1020         case OPTION_IMAGE_OPTS:
1021             image_opts = true;
1022             break;
1023         }
1024     }
1025 
1026     /* Progress is not shown in Quiet mode */
1027     if (quiet) {
1028         progress = false;
1029     }
1030 
1031     if (optind != argc - 1) {
1032         error_exit("Expecting one image file name");
1033     }
1034     filename = argv[optind++];
1035 
1036     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
1037     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1038     if (ret < 0) {
1039         error_report("Invalid cache option: %s", cache);
1040         return 1;
1041     }
1042 
1043     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1044                    false);
1045     if (!blk) {
1046         return 1;
1047     }
1048     bs = blk_bs(blk);
1049 
1050     qemu_progress_init(progress, 1.f);
1051     qemu_progress_print(0.f, 100);
1052 
1053     bdrv_graph_rdlock_main_loop();
1054     if (base) {
1055         base_bs = bdrv_find_backing_image(bs, base);
1056         if (!base_bs) {
1057             error_setg(&local_err,
1058                        "Did not find '%s' in the backing chain of '%s'",
1059                        base, filename);
1060             bdrv_graph_rdunlock_main_loop();
1061             goto done;
1062         }
1063     } else {
1064         /* This is different from QMP, which by default uses the deepest file in
1065          * the backing chain (i.e., the very base); however, the traditional
1066          * behavior of qemu-img commit is using the immediate backing file. */
1067         base_bs = bdrv_backing_chain_next(bs);
1068         if (!base_bs) {
1069             error_setg(&local_err, "Image does not have a backing file");
1070             bdrv_graph_rdunlock_main_loop();
1071             goto done;
1072         }
1073     }
1074     bdrv_graph_rdunlock_main_loop();
1075 
1076     cbi = (CommonBlockJobCBInfo){
1077         .errp = &local_err,
1078         .bs   = bs,
1079     };
1080 
1081     aio_context = bdrv_get_aio_context(bs);
1082     aio_context_acquire(aio_context);
1083     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit,
1084                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1085                         &cbi, false, &local_err);
1086     aio_context_release(aio_context);
1087     if (local_err) {
1088         goto done;
1089     }
1090 
1091     /* When the block job completes, the BlockBackend reference will point to
1092      * the old backing file. In order to avoid that the top image is already
1093      * deleted, so we can still empty it afterwards, increment the reference
1094      * counter here preemptively. */
1095     if (!drop) {
1096         bdrv_ref(bs);
1097     }
1098 
1099     job = block_job_get("commit");
1100     assert(job);
1101     run_block_job(job, &local_err);
1102     if (local_err) {
1103         goto unref_backing;
1104     }
1105 
1106     if (!drop) {
1107         BlockBackend *old_backing_blk;
1108 
1109         old_backing_blk = blk_new_with_bs(bs, BLK_PERM_WRITE, BLK_PERM_ALL,
1110                                           &local_err);
1111         if (!old_backing_blk) {
1112             goto unref_backing;
1113         }
1114         ret = blk_make_empty(old_backing_blk, &local_err);
1115         blk_unref(old_backing_blk);
1116         if (ret == -ENOTSUP) {
1117             error_free(local_err);
1118             local_err = NULL;
1119         } else if (ret < 0) {
1120             goto unref_backing;
1121         }
1122     }
1123 
1124 unref_backing:
1125     if (!drop) {
1126         bdrv_unref(bs);
1127     }
1128 
1129 done:
1130     qemu_progress_end();
1131 
1132     /*
1133      * Manually inactivate the image first because this way we can know whether
1134      * an error occurred. blk_unref() doesn't tell us about failures.
1135      */
1136     ret = bdrv_inactivate_all();
1137     if (ret < 0 && !local_err) {
1138         error_setg_errno(&local_err, -ret, "Error while closing the image");
1139     }
1140     blk_unref(blk);
1141 
1142     if (local_err) {
1143         error_report_err(local_err);
1144         return 1;
1145     }
1146 
1147     qprintf(quiet, "Image committed.\n");
1148     return 0;
1149 }
1150 
1151 /*
1152  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1153  * of the first sector boundary within buf where the sector contains a
1154  * non-zero byte.  This function is robust to a buffer that is not
1155  * sector-aligned.
1156  */
1157 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1158 {
1159     int64_t i;
1160     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1161 
1162     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1163         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1164             return i;
1165         }
1166     }
1167     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1168         return i;
1169     }
1170     return -1;
1171 }
1172 
1173 /*
1174  * Returns true iff the first sector pointed to by 'buf' contains at least
1175  * a non-NUL byte.
1176  *
1177  * 'pnum' is set to the number of sectors (including and immediately following
1178  * the first one) that are known to be in the same allocated/unallocated state.
1179  * The function will try to align the end offset to alignment boundaries so
1180  * that the request will at least end aligned and consecutive requests will
1181  * also start at an aligned offset.
1182  */
1183 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1184                                 int64_t sector_num, int alignment)
1185 {
1186     bool is_zero;
1187     int i, tail;
1188 
1189     if (n <= 0) {
1190         *pnum = 0;
1191         return 0;
1192     }
1193     is_zero = buffer_is_zero(buf, BDRV_SECTOR_SIZE);
1194     for(i = 1; i < n; i++) {
1195         buf += BDRV_SECTOR_SIZE;
1196         if (is_zero != buffer_is_zero(buf, BDRV_SECTOR_SIZE)) {
1197             break;
1198         }
1199     }
1200 
1201     if (i == n) {
1202         /*
1203          * The whole buf is the same.
1204          * No reason to split it into chunks, so return now.
1205          */
1206         *pnum = i;
1207         return !is_zero;
1208     }
1209 
1210     tail = (sector_num + i) & (alignment - 1);
1211     if (tail) {
1212         if (is_zero && i <= tail) {
1213             /*
1214              * For sure next sector after i is data, and it will rewrite this
1215              * tail anyway due to RMW. So, let's just write data now.
1216              */
1217             is_zero = false;
1218         }
1219         if (!is_zero) {
1220             /* If possible, align up end offset of allocated areas. */
1221             i += alignment - tail;
1222             i = MIN(i, n);
1223         } else {
1224             /*
1225              * For sure next sector after i is data, and it will rewrite this
1226              * tail anyway due to RMW. Better is avoid RMW and write zeroes up
1227              * to aligned bound.
1228              */
1229             i -= tail;
1230         }
1231     }
1232     *pnum = i;
1233     return !is_zero;
1234 }
1235 
1236 /*
1237  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1238  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1239  * breaking up write requests for only small sparse areas.
1240  */
1241 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1242     int min, int64_t sector_num, int alignment)
1243 {
1244     int ret;
1245     int num_checked, num_used;
1246 
1247     if (n < min) {
1248         min = n;
1249     }
1250 
1251     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1252     if (!ret) {
1253         return ret;
1254     }
1255 
1256     num_used = *pnum;
1257     buf += BDRV_SECTOR_SIZE * *pnum;
1258     n -= *pnum;
1259     sector_num += *pnum;
1260     num_checked = num_used;
1261 
1262     while (n > 0) {
1263         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1264 
1265         buf += BDRV_SECTOR_SIZE * *pnum;
1266         n -= *pnum;
1267         sector_num += *pnum;
1268         num_checked += *pnum;
1269         if (ret) {
1270             num_used = num_checked;
1271         } else if (*pnum >= min) {
1272             break;
1273         }
1274     }
1275 
1276     *pnum = num_used;
1277     return 1;
1278 }
1279 
1280 /*
1281  * Compares two buffers chunk by chunk, where @chsize is the chunk size.
1282  * If @chsize is 0, default chunk size of BDRV_SECTOR_SIZE is used.
1283  * Returns 0 if the first chunk of each buffer matches, non-zero otherwise.
1284  *
1285  * @pnum is set to the size of the buffer prefix aligned to @chsize that
1286  * has the same matching status as the first chunk.
1287  */
1288 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1289                            int64_t bytes, uint64_t chsize, int64_t *pnum)
1290 {
1291     bool res;
1292     int64_t i;
1293 
1294     assert(bytes > 0);
1295 
1296     if (!chsize) {
1297         chsize = BDRV_SECTOR_SIZE;
1298     }
1299     i = MIN(bytes, chsize);
1300 
1301     res = !!memcmp(buf1, buf2, i);
1302     while (i < bytes) {
1303         int64_t len = MIN(bytes - i, chsize);
1304 
1305         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1306             break;
1307         }
1308         i += len;
1309     }
1310 
1311     *pnum = i;
1312     return res;
1313 }
1314 
1315 #define IO_BUF_SIZE (2 * MiB)
1316 
1317 /*
1318  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1319  *
1320  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1321  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1322  * failure), and 4 on error (the exit status for read errors), after emitting
1323  * an error message.
1324  *
1325  * @param blk:  BlockBackend for the image
1326  * @param offset: Starting offset to check
1327  * @param bytes: Number of bytes to check
1328  * @param filename: Name of disk file we are checking (logging purpose)
1329  * @param buffer: Allocated buffer for storing read data
1330  * @param quiet: Flag for quiet mode
1331  */
1332 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1333                                int64_t bytes, const char *filename,
1334                                uint8_t *buffer, bool quiet)
1335 {
1336     int ret = 0;
1337     int64_t idx;
1338 
1339     ret = blk_pread(blk, offset, bytes, buffer, 0);
1340     if (ret < 0) {
1341         error_report("Error while reading offset %" PRId64 " of %s: %s",
1342                      offset, filename, strerror(-ret));
1343         return 4;
1344     }
1345     idx = find_nonzero(buffer, bytes);
1346     if (idx >= 0) {
1347         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1348                 offset + idx);
1349         return 1;
1350     }
1351 
1352     return 0;
1353 }
1354 
1355 /*
1356  * Compares two images. Exit codes:
1357  *
1358  * 0 - Images are identical or the requested help was printed
1359  * 1 - Images differ
1360  * >1 - Error occurred
1361  */
1362 static int img_compare(int argc, char **argv)
1363 {
1364     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1365     BlockBackend *blk1, *blk2;
1366     BlockDriverState *bs1, *bs2;
1367     int64_t total_size1, total_size2;
1368     uint8_t *buf1 = NULL, *buf2 = NULL;
1369     int64_t pnum1, pnum2;
1370     int allocated1, allocated2;
1371     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1372     bool progress = false, quiet = false, strict = false;
1373     int flags;
1374     bool writethrough;
1375     int64_t total_size;
1376     int64_t offset = 0;
1377     int64_t chunk;
1378     int c;
1379     uint64_t progress_base;
1380     bool image_opts = false;
1381     bool force_share = false;
1382 
1383     cache = BDRV_DEFAULT_CACHE;
1384     for (;;) {
1385         static const struct option long_options[] = {
1386             {"help", no_argument, 0, 'h'},
1387             {"object", required_argument, 0, OPTION_OBJECT},
1388             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1389             {"force-share", no_argument, 0, 'U'},
1390             {0, 0, 0, 0}
1391         };
1392         c = getopt_long(argc, argv, ":hf:F:T:pqsU",
1393                         long_options, NULL);
1394         if (c == -1) {
1395             break;
1396         }
1397         switch (c) {
1398         case ':':
1399             missing_argument(argv[optind - 1]);
1400             break;
1401         case '?':
1402             unrecognized_option(argv[optind - 1]);
1403             break;
1404         case 'h':
1405             help();
1406             break;
1407         case 'f':
1408             fmt1 = optarg;
1409             break;
1410         case 'F':
1411             fmt2 = optarg;
1412             break;
1413         case 'T':
1414             cache = optarg;
1415             break;
1416         case 'p':
1417             progress = true;
1418             break;
1419         case 'q':
1420             quiet = true;
1421             break;
1422         case 's':
1423             strict = true;
1424             break;
1425         case 'U':
1426             force_share = true;
1427             break;
1428         case OPTION_OBJECT:
1429             {
1430                 Error *local_err = NULL;
1431 
1432                 if (!user_creatable_add_from_str(optarg, &local_err)) {
1433                     if (local_err) {
1434                         error_report_err(local_err);
1435                         exit(2);
1436                     } else {
1437                         /* Help was printed */
1438                         exit(EXIT_SUCCESS);
1439                     }
1440                 }
1441                 break;
1442             }
1443         case OPTION_IMAGE_OPTS:
1444             image_opts = true;
1445             break;
1446         }
1447     }
1448 
1449     /* Progress is not shown in Quiet mode */
1450     if (quiet) {
1451         progress = false;
1452     }
1453 
1454 
1455     if (optind != argc - 2) {
1456         error_exit("Expecting two image file names");
1457     }
1458     filename1 = argv[optind++];
1459     filename2 = argv[optind++];
1460 
1461     /* Initialize before goto out */
1462     qemu_progress_init(progress, 2.0);
1463 
1464     flags = 0;
1465     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1466     if (ret < 0) {
1467         error_report("Invalid source cache option: %s", cache);
1468         ret = 2;
1469         goto out3;
1470     }
1471 
1472     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1473                     force_share);
1474     if (!blk1) {
1475         ret = 2;
1476         goto out3;
1477     }
1478 
1479     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1480                     force_share);
1481     if (!blk2) {
1482         ret = 2;
1483         goto out2;
1484     }
1485     bs1 = blk_bs(blk1);
1486     bs2 = blk_bs(blk2);
1487 
1488     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1489     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1490     total_size1 = blk_getlength(blk1);
1491     if (total_size1 < 0) {
1492         error_report("Can't get size of %s: %s",
1493                      filename1, strerror(-total_size1));
1494         ret = 4;
1495         goto out;
1496     }
1497     total_size2 = blk_getlength(blk2);
1498     if (total_size2 < 0) {
1499         error_report("Can't get size of %s: %s",
1500                      filename2, strerror(-total_size2));
1501         ret = 4;
1502         goto out;
1503     }
1504     total_size = MIN(total_size1, total_size2);
1505     progress_base = MAX(total_size1, total_size2);
1506 
1507     qemu_progress_print(0, 100);
1508 
1509     if (strict && total_size1 != total_size2) {
1510         ret = 1;
1511         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1512         goto out;
1513     }
1514 
1515     while (offset < total_size) {
1516         int status1, status2;
1517 
1518         status1 = bdrv_block_status_above(bs1, NULL, offset,
1519                                           total_size1 - offset, &pnum1, NULL,
1520                                           NULL);
1521         if (status1 < 0) {
1522             ret = 3;
1523             error_report("Sector allocation test failed for %s", filename1);
1524             goto out;
1525         }
1526         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1527 
1528         status2 = bdrv_block_status_above(bs2, NULL, offset,
1529                                           total_size2 - offset, &pnum2, NULL,
1530                                           NULL);
1531         if (status2 < 0) {
1532             ret = 3;
1533             error_report("Sector allocation test failed for %s", filename2);
1534             goto out;
1535         }
1536         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1537 
1538         assert(pnum1 && pnum2);
1539         chunk = MIN(pnum1, pnum2);
1540 
1541         if (strict) {
1542             if (status1 != status2) {
1543                 ret = 1;
1544                 qprintf(quiet, "Strict mode: Offset %" PRId64
1545                         " block status mismatch!\n", offset);
1546                 goto out;
1547             }
1548         }
1549         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1550             /* nothing to do */
1551         } else if (allocated1 == allocated2) {
1552             if (allocated1) {
1553                 int64_t pnum;
1554 
1555                 chunk = MIN(chunk, IO_BUF_SIZE);
1556                 ret = blk_pread(blk1, offset, chunk, buf1, 0);
1557                 if (ret < 0) {
1558                     error_report("Error while reading offset %" PRId64
1559                                  " of %s: %s",
1560                                  offset, filename1, strerror(-ret));
1561                     ret = 4;
1562                     goto out;
1563                 }
1564                 ret = blk_pread(blk2, offset, chunk, buf2, 0);
1565                 if (ret < 0) {
1566                     error_report("Error while reading offset %" PRId64
1567                                  " of %s: %s",
1568                                  offset, filename2, strerror(-ret));
1569                     ret = 4;
1570                     goto out;
1571                 }
1572                 ret = compare_buffers(buf1, buf2, chunk, 0, &pnum);
1573                 if (ret || pnum != chunk) {
1574                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1575                             offset + (ret ? 0 : pnum));
1576                     ret = 1;
1577                     goto out;
1578                 }
1579             }
1580         } else {
1581             chunk = MIN(chunk, IO_BUF_SIZE);
1582             if (allocated1) {
1583                 ret = check_empty_sectors(blk1, offset, chunk,
1584                                           filename1, buf1, quiet);
1585             } else {
1586                 ret = check_empty_sectors(blk2, offset, chunk,
1587                                           filename2, buf1, quiet);
1588             }
1589             if (ret) {
1590                 goto out;
1591             }
1592         }
1593         offset += chunk;
1594         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1595     }
1596 
1597     if (total_size1 != total_size2) {
1598         BlockBackend *blk_over;
1599         const char *filename_over;
1600 
1601         qprintf(quiet, "Warning: Image size mismatch!\n");
1602         if (total_size1 > total_size2) {
1603             blk_over = blk1;
1604             filename_over = filename1;
1605         } else {
1606             blk_over = blk2;
1607             filename_over = filename2;
1608         }
1609 
1610         while (offset < progress_base) {
1611             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1612                                           progress_base - offset, &chunk,
1613                                           NULL, NULL);
1614             if (ret < 0) {
1615                 ret = 3;
1616                 error_report("Sector allocation test failed for %s",
1617                              filename_over);
1618                 goto out;
1619 
1620             }
1621             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1622                 chunk = MIN(chunk, IO_BUF_SIZE);
1623                 ret = check_empty_sectors(blk_over, offset, chunk,
1624                                           filename_over, buf1, quiet);
1625                 if (ret) {
1626                     goto out;
1627                 }
1628             }
1629             offset += chunk;
1630             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1631         }
1632     }
1633 
1634     qprintf(quiet, "Images are identical.\n");
1635     ret = 0;
1636 
1637 out:
1638     qemu_vfree(buf1);
1639     qemu_vfree(buf2);
1640     blk_unref(blk2);
1641 out2:
1642     blk_unref(blk1);
1643 out3:
1644     qemu_progress_end();
1645     return ret;
1646 }
1647 
1648 /* Convenience wrapper around qmp_block_dirty_bitmap_merge */
1649 static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name,
1650                                   const char *src_node, const char *src_name,
1651                                   Error **errp)
1652 {
1653     BlockDirtyBitmapOrStr *merge_src;
1654     BlockDirtyBitmapOrStrList *list = NULL;
1655 
1656     merge_src = g_new0(BlockDirtyBitmapOrStr, 1);
1657     merge_src->type = QTYPE_QDICT;
1658     merge_src->u.external.node = g_strdup(src_node);
1659     merge_src->u.external.name = g_strdup(src_name);
1660     QAPI_LIST_PREPEND(list, merge_src);
1661     qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp);
1662     qapi_free_BlockDirtyBitmapOrStrList(list);
1663 }
1664 
1665 enum ImgConvertBlockStatus {
1666     BLK_DATA,
1667     BLK_ZERO,
1668     BLK_BACKING_FILE,
1669 };
1670 
1671 #define MAX_COROUTINES 16
1672 #define CONVERT_THROTTLE_GROUP "img_convert"
1673 
1674 typedef struct ImgConvertState {
1675     BlockBackend **src;
1676     int64_t *src_sectors;
1677     int *src_alignment;
1678     int src_num;
1679     int64_t total_sectors;
1680     int64_t allocated_sectors;
1681     int64_t allocated_done;
1682     int64_t sector_num;
1683     int64_t wr_offs;
1684     enum ImgConvertBlockStatus status;
1685     int64_t sector_next_status;
1686     BlockBackend *target;
1687     bool has_zero_init;
1688     bool compressed;
1689     bool target_is_new;
1690     bool target_has_backing;
1691     int64_t target_backing_sectors; /* negative if unknown */
1692     bool wr_in_order;
1693     bool copy_range;
1694     bool salvage;
1695     bool quiet;
1696     int min_sparse;
1697     int alignment;
1698     size_t cluster_sectors;
1699     size_t buf_sectors;
1700     long num_coroutines;
1701     int running_coroutines;
1702     Coroutine *co[MAX_COROUTINES];
1703     int64_t wait_sector_num[MAX_COROUTINES];
1704     CoMutex lock;
1705     int ret;
1706 } ImgConvertState;
1707 
1708 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1709                                 int *src_cur, int64_t *src_cur_offset)
1710 {
1711     *src_cur = 0;
1712     *src_cur_offset = 0;
1713     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1714         *src_cur_offset += s->src_sectors[*src_cur];
1715         (*src_cur)++;
1716         assert(*src_cur < s->src_num);
1717     }
1718 }
1719 
1720 static int coroutine_mixed_fn GRAPH_RDLOCK
1721 convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1722 {
1723     int64_t src_cur_offset;
1724     int ret, n, src_cur;
1725     bool post_backing_zero = false;
1726 
1727     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1728 
1729     assert(s->total_sectors > sector_num);
1730     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1731 
1732     if (s->target_backing_sectors >= 0) {
1733         if (sector_num >= s->target_backing_sectors) {
1734             post_backing_zero = true;
1735         } else if (sector_num + n > s->target_backing_sectors) {
1736             /* Split requests around target_backing_sectors (because
1737              * starting from there, zeros are handled differently) */
1738             n = s->target_backing_sectors - sector_num;
1739         }
1740     }
1741 
1742     if (s->sector_next_status <= sector_num) {
1743         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1744         int64_t count;
1745         int tail;
1746         BlockDriverState *src_bs = blk_bs(s->src[src_cur]);
1747         BlockDriverState *base;
1748 
1749         if (s->target_has_backing) {
1750             base = bdrv_cow_bs(bdrv_skip_filters(src_bs));
1751         } else {
1752             base = NULL;
1753         }
1754 
1755         do {
1756             count = n * BDRV_SECTOR_SIZE;
1757 
1758             ret = bdrv_block_status_above(src_bs, base, offset, count, &count,
1759                                           NULL, NULL);
1760 
1761             if (ret < 0) {
1762                 if (s->salvage) {
1763                     if (n == 1) {
1764                         if (!s->quiet) {
1765                             warn_report("error while reading block status at "
1766                                         "offset %" PRIu64 ": %s", offset,
1767                                         strerror(-ret));
1768                         }
1769                         /* Just try to read the data, then */
1770                         ret = BDRV_BLOCK_DATA;
1771                         count = BDRV_SECTOR_SIZE;
1772                     } else {
1773                         /* Retry on a shorter range */
1774                         n = DIV_ROUND_UP(n, 4);
1775                     }
1776                 } else {
1777                     error_report("error while reading block status at offset "
1778                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1779                     return ret;
1780                 }
1781             }
1782         } while (ret < 0);
1783 
1784         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1785 
1786         /*
1787          * Avoid that s->sector_next_status becomes unaligned to the source
1788          * request alignment and/or cluster size to avoid unnecessary read
1789          * cycles.
1790          */
1791         tail = (sector_num - src_cur_offset + n) % s->src_alignment[src_cur];
1792         if (n > tail) {
1793             n -= tail;
1794         }
1795 
1796         if (ret & BDRV_BLOCK_ZERO) {
1797             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1798         } else if (ret & BDRV_BLOCK_DATA) {
1799             s->status = BLK_DATA;
1800         } else {
1801             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1802         }
1803 
1804         s->sector_next_status = sector_num + n;
1805     }
1806 
1807     n = MIN(n, s->sector_next_status - sector_num);
1808     if (s->status == BLK_DATA) {
1809         n = MIN(n, s->buf_sectors);
1810     }
1811 
1812     /* We need to write complete clusters for compressed images, so if an
1813      * unallocated area is shorter than that, we must consider the whole
1814      * cluster allocated. */
1815     if (s->compressed) {
1816         if (n < s->cluster_sectors) {
1817             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1818             s->status = BLK_DATA;
1819         } else {
1820             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1821         }
1822     }
1823 
1824     return n;
1825 }
1826 
1827 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1828                                         int nb_sectors, uint8_t *buf)
1829 {
1830     uint64_t single_read_until = 0;
1831     int n, ret;
1832 
1833     assert(nb_sectors <= s->buf_sectors);
1834     while (nb_sectors > 0) {
1835         BlockBackend *blk;
1836         int src_cur;
1837         int64_t bs_sectors, src_cur_offset;
1838         uint64_t offset;
1839 
1840         /* In the case of compression with multiple source files, we can get a
1841          * nb_sectors that spreads into the next part. So we must be able to
1842          * read across multiple BDSes for one convert_read() call. */
1843         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1844         blk = s->src[src_cur];
1845         bs_sectors = s->src_sectors[src_cur];
1846 
1847         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1848 
1849         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1850         if (single_read_until > offset) {
1851             n = 1;
1852         }
1853 
1854         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1855         if (ret < 0) {
1856             if (s->salvage) {
1857                 if (n > 1) {
1858                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1859                     continue;
1860                 } else {
1861                     if (!s->quiet) {
1862                         warn_report("error while reading offset %" PRIu64
1863                                     ": %s", offset, strerror(-ret));
1864                     }
1865                     memset(buf, 0, BDRV_SECTOR_SIZE);
1866                 }
1867             } else {
1868                 return ret;
1869             }
1870         }
1871 
1872         sector_num += n;
1873         nb_sectors -= n;
1874         buf += n * BDRV_SECTOR_SIZE;
1875     }
1876 
1877     return 0;
1878 }
1879 
1880 
1881 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1882                                          int nb_sectors, uint8_t *buf,
1883                                          enum ImgConvertBlockStatus status)
1884 {
1885     int ret;
1886 
1887     while (nb_sectors > 0) {
1888         int n = nb_sectors;
1889         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1890 
1891         switch (status) {
1892         case BLK_BACKING_FILE:
1893             /* If we have a backing file, leave clusters unallocated that are
1894              * unallocated in the source image, so that the backing file is
1895              * visible at the respective offset. */
1896             assert(s->target_has_backing);
1897             break;
1898 
1899         case BLK_DATA:
1900             /* If we're told to keep the target fully allocated (-S 0) or there
1901              * is real non-zero data, we must write it. Otherwise we can treat
1902              * it as zero sectors.
1903              * Compressed clusters need to be written as a whole, so in that
1904              * case we can only save the write if the buffer is completely
1905              * zeroed. */
1906             if (!s->min_sparse ||
1907                 (!s->compressed &&
1908                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1909                                           sector_num, s->alignment)) ||
1910                 (s->compressed &&
1911                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1912             {
1913                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1914                                     n << BDRV_SECTOR_BITS, buf, flags);
1915                 if (ret < 0) {
1916                     return ret;
1917                 }
1918                 break;
1919             }
1920             /* fall-through */
1921 
1922         case BLK_ZERO:
1923             if (s->has_zero_init) {
1924                 assert(!s->target_has_backing);
1925                 break;
1926             }
1927             ret = blk_co_pwrite_zeroes(s->target,
1928                                        sector_num << BDRV_SECTOR_BITS,
1929                                        n << BDRV_SECTOR_BITS,
1930                                        BDRV_REQ_MAY_UNMAP);
1931             if (ret < 0) {
1932                 return ret;
1933             }
1934             break;
1935         }
1936 
1937         sector_num += n;
1938         nb_sectors -= n;
1939         buf += n * BDRV_SECTOR_SIZE;
1940     }
1941 
1942     return 0;
1943 }
1944 
1945 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1946                                               int nb_sectors)
1947 {
1948     int n, ret;
1949 
1950     while (nb_sectors > 0) {
1951         BlockBackend *blk;
1952         int src_cur;
1953         int64_t bs_sectors, src_cur_offset;
1954         int64_t offset;
1955 
1956         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1957         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1958         blk = s->src[src_cur];
1959         bs_sectors = s->src_sectors[src_cur];
1960 
1961         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1962 
1963         ret = blk_co_copy_range(blk, offset, s->target,
1964                                 sector_num << BDRV_SECTOR_BITS,
1965                                 n << BDRV_SECTOR_BITS, 0, 0);
1966         if (ret < 0) {
1967             return ret;
1968         }
1969 
1970         sector_num += n;
1971         nb_sectors -= n;
1972     }
1973     return 0;
1974 }
1975 
1976 static void coroutine_fn convert_co_do_copy(void *opaque)
1977 {
1978     ImgConvertState *s = opaque;
1979     uint8_t *buf = NULL;
1980     int ret, i;
1981     int index = -1;
1982 
1983     for (i = 0; i < s->num_coroutines; i++) {
1984         if (s->co[i] == qemu_coroutine_self()) {
1985             index = i;
1986             break;
1987         }
1988     }
1989     assert(index >= 0);
1990 
1991     s->running_coroutines++;
1992     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1993 
1994     while (1) {
1995         int n;
1996         int64_t sector_num;
1997         enum ImgConvertBlockStatus status;
1998         bool copy_range;
1999 
2000         qemu_co_mutex_lock(&s->lock);
2001         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
2002             qemu_co_mutex_unlock(&s->lock);
2003             break;
2004         }
2005         WITH_GRAPH_RDLOCK_GUARD() {
2006             n = convert_iteration_sectors(s, s->sector_num);
2007         }
2008         if (n < 0) {
2009             qemu_co_mutex_unlock(&s->lock);
2010             s->ret = n;
2011             break;
2012         }
2013         /* save current sector and allocation status to local variables */
2014         sector_num = s->sector_num;
2015         status = s->status;
2016         if (!s->min_sparse && s->status == BLK_ZERO) {
2017             n = MIN(n, s->buf_sectors);
2018         }
2019         /* increment global sector counter so that other coroutines can
2020          * already continue reading beyond this request */
2021         s->sector_num += n;
2022         qemu_co_mutex_unlock(&s->lock);
2023 
2024         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
2025             s->allocated_done += n;
2026             qemu_progress_print(100.0 * s->allocated_done /
2027                                         s->allocated_sectors, 0);
2028         }
2029 
2030 retry:
2031         copy_range = s->copy_range && s->status == BLK_DATA;
2032         if (status == BLK_DATA && !copy_range) {
2033             ret = convert_co_read(s, sector_num, n, buf);
2034             if (ret < 0) {
2035                 error_report("error while reading at byte %lld: %s",
2036                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2037                 s->ret = ret;
2038             }
2039         } else if (!s->min_sparse && status == BLK_ZERO) {
2040             status = BLK_DATA;
2041             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
2042         }
2043 
2044         if (s->wr_in_order) {
2045             /* keep writes in order */
2046             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
2047                 s->wait_sector_num[index] = sector_num;
2048                 qemu_coroutine_yield();
2049             }
2050             s->wait_sector_num[index] = -1;
2051         }
2052 
2053         if (s->ret == -EINPROGRESS) {
2054             if (copy_range) {
2055                 WITH_GRAPH_RDLOCK_GUARD() {
2056                     ret = convert_co_copy_range(s, sector_num, n);
2057                 }
2058                 if (ret) {
2059                     s->copy_range = false;
2060                     goto retry;
2061                 }
2062             } else {
2063                 ret = convert_co_write(s, sector_num, n, buf, status);
2064             }
2065             if (ret < 0) {
2066                 error_report("error while writing at byte %lld: %s",
2067                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2068                 s->ret = ret;
2069             }
2070         }
2071 
2072         if (s->wr_in_order) {
2073             /* reenter the coroutine that might have waited
2074              * for this write to complete */
2075             s->wr_offs = sector_num + n;
2076             for (i = 0; i < s->num_coroutines; i++) {
2077                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
2078                     /*
2079                      * A -> B -> A cannot occur because A has
2080                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
2081                      * B will never enter A during this time window.
2082                      */
2083                     qemu_coroutine_enter(s->co[i]);
2084                     break;
2085                 }
2086             }
2087         }
2088     }
2089 
2090     qemu_vfree(buf);
2091     s->co[index] = NULL;
2092     s->running_coroutines--;
2093     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
2094         /* the convert job finished successfully */
2095         s->ret = 0;
2096     }
2097 }
2098 
2099 static int convert_do_copy(ImgConvertState *s)
2100 {
2101     int ret, i, n;
2102     int64_t sector_num = 0;
2103 
2104     /* Check whether we have zero initialisation or can get it efficiently */
2105     if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
2106         !s->target_has_backing) {
2107         bdrv_graph_rdlock_main_loop();
2108         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
2109         bdrv_graph_rdunlock_main_loop();
2110     }
2111 
2112     /* Allocate buffer for copied data. For compressed images, only one cluster
2113      * can be copied at a time. */
2114     if (s->compressed) {
2115         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2116             error_report("invalid cluster size");
2117             return -EINVAL;
2118         }
2119         s->buf_sectors = s->cluster_sectors;
2120     }
2121 
2122     while (sector_num < s->total_sectors) {
2123         bdrv_graph_rdlock_main_loop();
2124         n = convert_iteration_sectors(s, sector_num);
2125         bdrv_graph_rdunlock_main_loop();
2126         if (n < 0) {
2127             return n;
2128         }
2129         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2130         {
2131             s->allocated_sectors += n;
2132         }
2133         sector_num += n;
2134     }
2135 
2136     /* Do the copy */
2137     s->sector_next_status = 0;
2138     s->ret = -EINPROGRESS;
2139 
2140     qemu_co_mutex_init(&s->lock);
2141     for (i = 0; i < s->num_coroutines; i++) {
2142         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2143         s->wait_sector_num[i] = -1;
2144         qemu_coroutine_enter(s->co[i]);
2145     }
2146 
2147     while (s->running_coroutines) {
2148         main_loop_wait(false);
2149     }
2150 
2151     if (s->compressed && !s->ret) {
2152         /* signal EOF to align */
2153         ret = blk_pwrite_compressed(s->target, 0, 0, NULL);
2154         if (ret < 0) {
2155             return ret;
2156         }
2157     }
2158 
2159     return s->ret;
2160 }
2161 
2162 /* Check that bitmaps can be copied, or output an error */
2163 static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken)
2164 {
2165     BdrvDirtyBitmap *bm;
2166 
2167     if (!bdrv_supports_persistent_dirty_bitmap(src)) {
2168         error_report("Source lacks bitmap support");
2169         return -1;
2170     }
2171     FOR_EACH_DIRTY_BITMAP(src, bm) {
2172         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2173             continue;
2174         }
2175         if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2176             error_report("Cannot copy inconsistent bitmap '%s'",
2177                          bdrv_dirty_bitmap_name(bm));
2178             error_printf("Try --skip-broken-bitmaps, or "
2179                          "use 'qemu-img bitmap --remove' to delete it\n");
2180             return -1;
2181         }
2182     }
2183     return 0;
2184 }
2185 
2186 static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst,
2187                                 bool skip_broken)
2188 {
2189     BdrvDirtyBitmap *bm;
2190     Error *err = NULL;
2191 
2192     FOR_EACH_DIRTY_BITMAP(src, bm) {
2193         const char *name;
2194 
2195         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2196             continue;
2197         }
2198         name = bdrv_dirty_bitmap_name(bm);
2199         if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2200             warn_report("Skipping inconsistent bitmap '%s'", name);
2201             continue;
2202         }
2203         qmp_block_dirty_bitmap_add(dst->node_name, name,
2204                                    true, bdrv_dirty_bitmap_granularity(bm),
2205                                    true, true,
2206                                    true, !bdrv_dirty_bitmap_enabled(bm),
2207                                    &err);
2208         if (err) {
2209             error_reportf_err(err, "Failed to create bitmap %s: ", name);
2210             return -1;
2211         }
2212 
2213         do_dirty_bitmap_merge(dst->node_name, name, src->node_name, name,
2214                               &err);
2215         if (err) {
2216             error_reportf_err(err, "Failed to populate bitmap %s: ", name);
2217             qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL);
2218             return -1;
2219         }
2220     }
2221 
2222     return 0;
2223 }
2224 
2225 #define MAX_BUF_SECTORS 32768
2226 
2227 static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
2228 {
2229     ThrottleConfig cfg;
2230 
2231     throttle_config_init(&cfg);
2232     cfg.buckets[THROTTLE_BPS_WRITE].avg = rate_limit;
2233 
2234     blk_io_limits_enable(blk, CONVERT_THROTTLE_GROUP);
2235     blk_set_io_limits(blk, &cfg);
2236 }
2237 
2238 static int img_convert(int argc, char **argv)
2239 {
2240     int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
2241     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2242                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2243                *out_filename, *out_baseimg_param, *snapshot_name = NULL,
2244                *backing_fmt = NULL;
2245     BlockDriver *drv = NULL, *proto_drv = NULL;
2246     BlockDriverInfo bdi;
2247     BlockDriverState *out_bs;
2248     QemuOpts *opts = NULL, *sn_opts = NULL;
2249     QemuOptsList *create_opts = NULL;
2250     QDict *open_opts = NULL;
2251     char *options = NULL;
2252     Error *local_err = NULL;
2253     bool writethrough, src_writethrough, image_opts = false,
2254          skip_create = false, progress = false, tgt_image_opts = false;
2255     int64_t ret = -EINVAL;
2256     bool force_share = false;
2257     bool explict_min_sparse = false;
2258     bool bitmaps = false;
2259     bool skip_broken = false;
2260     int64_t rate_limit = 0;
2261 
2262     ImgConvertState s = (ImgConvertState) {
2263         /* Need at least 4k of zeros for sparse detection */
2264         .min_sparse         = 8,
2265         .copy_range         = false,
2266         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2267         .wr_in_order        = true,
2268         .num_coroutines     = 8,
2269     };
2270 
2271     for(;;) {
2272         static const struct option long_options[] = {
2273             {"help", no_argument, 0, 'h'},
2274             {"object", required_argument, 0, OPTION_OBJECT},
2275             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2276             {"force-share", no_argument, 0, 'U'},
2277             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2278             {"salvage", no_argument, 0, OPTION_SALVAGE},
2279             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2280             {"bitmaps", no_argument, 0, OPTION_BITMAPS},
2281             {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
2282             {0, 0, 0, 0}
2283         };
2284         c = getopt_long(argc, argv, ":hf:O:B:CcF:o:l:S:pt:T:qnm:WUr:",
2285                         long_options, NULL);
2286         if (c == -1) {
2287             break;
2288         }
2289         switch(c) {
2290         case ':':
2291             missing_argument(argv[optind - 1]);
2292             break;
2293         case '?':
2294             unrecognized_option(argv[optind - 1]);
2295             break;
2296         case 'h':
2297             help();
2298             break;
2299         case 'f':
2300             fmt = optarg;
2301             break;
2302         case 'O':
2303             out_fmt = optarg;
2304             break;
2305         case 'B':
2306             out_baseimg = optarg;
2307             break;
2308         case 'C':
2309             s.copy_range = true;
2310             break;
2311         case 'c':
2312             s.compressed = true;
2313             break;
2314         case 'F':
2315             backing_fmt = optarg;
2316             break;
2317         case 'o':
2318             if (accumulate_options(&options, optarg) < 0) {
2319                 goto fail_getopt;
2320             }
2321             break;
2322         case 'l':
2323             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2324                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2325                                                   optarg, false);
2326                 if (!sn_opts) {
2327                     error_report("Failed in parsing snapshot param '%s'",
2328                                  optarg);
2329                     goto fail_getopt;
2330                 }
2331             } else {
2332                 snapshot_name = optarg;
2333             }
2334             break;
2335         case 'S':
2336         {
2337             int64_t sval;
2338 
2339             sval = cvtnum("buffer size for sparse output", optarg);
2340             if (sval < 0) {
2341                 goto fail_getopt;
2342             } else if (!QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2343                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2344                 error_report("Invalid buffer size for sparse output specified. "
2345                     "Valid sizes are multiples of %llu up to %llu. Select "
2346                     "0 to disable sparse detection (fully allocates output).",
2347                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2348                 goto fail_getopt;
2349             }
2350 
2351             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2352             explict_min_sparse = true;
2353             break;
2354         }
2355         case 'p':
2356             progress = true;
2357             break;
2358         case 't':
2359             cache = optarg;
2360             break;
2361         case 'T':
2362             src_cache = optarg;
2363             break;
2364         case 'q':
2365             s.quiet = true;
2366             break;
2367         case 'n':
2368             skip_create = true;
2369             break;
2370         case 'm':
2371             if (qemu_strtol(optarg, NULL, 0, &s.num_coroutines) ||
2372                 s.num_coroutines < 1 || s.num_coroutines > MAX_COROUTINES) {
2373                 error_report("Invalid number of coroutines. Allowed number of"
2374                              " coroutines is between 1 and %d", MAX_COROUTINES);
2375                 goto fail_getopt;
2376             }
2377             break;
2378         case 'W':
2379             s.wr_in_order = false;
2380             break;
2381         case 'U':
2382             force_share = true;
2383             break;
2384         case 'r':
2385             rate_limit = cvtnum("rate limit", optarg);
2386             if (rate_limit < 0) {
2387                 goto fail_getopt;
2388             }
2389             break;
2390         case OPTION_OBJECT:
2391             user_creatable_process_cmdline(optarg);
2392             break;
2393         case OPTION_IMAGE_OPTS:
2394             image_opts = true;
2395             break;
2396         case OPTION_SALVAGE:
2397             s.salvage = true;
2398             break;
2399         case OPTION_TARGET_IMAGE_OPTS:
2400             tgt_image_opts = true;
2401             break;
2402         case OPTION_TARGET_IS_ZERO:
2403             /*
2404              * The user asserting that the target is blank has the
2405              * same effect as the target driver supporting zero
2406              * initialisation.
2407              */
2408             s.has_zero_init = true;
2409             break;
2410         case OPTION_BITMAPS:
2411             bitmaps = true;
2412             break;
2413         case OPTION_SKIP_BROKEN:
2414             skip_broken = true;
2415             break;
2416         }
2417     }
2418 
2419     if (!out_fmt && !tgt_image_opts) {
2420         out_fmt = "raw";
2421     }
2422 
2423     if (skip_broken && !bitmaps) {
2424         error_report("Use of --skip-broken-bitmaps requires --bitmaps");
2425         goto fail_getopt;
2426     }
2427 
2428     if (s.compressed && s.copy_range) {
2429         error_report("Cannot enable copy offloading when -c is used");
2430         goto fail_getopt;
2431     }
2432 
2433     if (explict_min_sparse && s.copy_range) {
2434         error_report("Cannot enable copy offloading when -S is used");
2435         goto fail_getopt;
2436     }
2437 
2438     if (s.copy_range && s.salvage) {
2439         error_report("Cannot use copy offloading in salvaging mode");
2440         goto fail_getopt;
2441     }
2442 
2443     if (tgt_image_opts && !skip_create) {
2444         error_report("--target-image-opts requires use of -n flag");
2445         goto fail_getopt;
2446     }
2447 
2448     if (skip_create && options) {
2449         error_report("-o has no effect when skipping image creation");
2450         goto fail_getopt;
2451     }
2452 
2453     if (s.has_zero_init && !skip_create) {
2454         error_report("--target-is-zero requires use of -n flag");
2455         goto fail_getopt;
2456     }
2457 
2458     s.src_num = argc - optind - 1;
2459     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2460 
2461     if (options && has_help_option(options)) {
2462         if (out_fmt) {
2463             ret = print_block_option_help(out_filename, out_fmt);
2464             goto fail_getopt;
2465         } else {
2466             error_report("Option help requires a format be specified");
2467             goto fail_getopt;
2468         }
2469     }
2470 
2471     if (s.src_num < 1) {
2472         error_report("Must specify image file name");
2473         goto fail_getopt;
2474     }
2475 
2476     /* ret is still -EINVAL until here */
2477     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2478     if (ret < 0) {
2479         error_report("Invalid source cache option: %s", src_cache);
2480         goto fail_getopt;
2481     }
2482 
2483     /* Initialize before goto out */
2484     if (s.quiet) {
2485         progress = false;
2486     }
2487     qemu_progress_init(progress, 1.0);
2488     qemu_progress_print(0, 100);
2489 
2490     s.src = g_new0(BlockBackend *, s.src_num);
2491     s.src_sectors = g_new(int64_t, s.src_num);
2492     s.src_alignment = g_new(int, s.src_num);
2493 
2494     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2495         BlockDriverState *src_bs;
2496         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2497                                fmt, src_flags, src_writethrough, s.quiet,
2498                                force_share);
2499         if (!s.src[bs_i]) {
2500             ret = -1;
2501             goto out;
2502         }
2503         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2504         if (s.src_sectors[bs_i] < 0) {
2505             error_report("Could not get size of %s: %s",
2506                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2507             ret = -1;
2508             goto out;
2509         }
2510         src_bs = blk_bs(s.src[bs_i]);
2511         s.src_alignment[bs_i] = DIV_ROUND_UP(src_bs->bl.request_alignment,
2512                                              BDRV_SECTOR_SIZE);
2513         if (!bdrv_get_info(src_bs, &bdi)) {
2514             s.src_alignment[bs_i] = MAX(s.src_alignment[bs_i],
2515                                         bdi.cluster_size / BDRV_SECTOR_SIZE);
2516         }
2517         s.total_sectors += s.src_sectors[bs_i];
2518     }
2519 
2520     if (sn_opts) {
2521         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2522                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2523                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2524                                &local_err);
2525     } else if (snapshot_name != NULL) {
2526         if (s.src_num > 1) {
2527             error_report("No support for concatenating multiple snapshot");
2528             ret = -1;
2529             goto out;
2530         }
2531 
2532         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2533                                              &local_err);
2534     }
2535     if (local_err) {
2536         error_reportf_err(local_err, "Failed to load snapshot: ");
2537         ret = -1;
2538         goto out;
2539     }
2540 
2541     if (!skip_create) {
2542         /* Find driver and parse its options */
2543         drv = bdrv_find_format(out_fmt);
2544         if (!drv) {
2545             error_report("Unknown file format '%s'", out_fmt);
2546             ret = -1;
2547             goto out;
2548         }
2549 
2550         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2551         if (!proto_drv) {
2552             error_report_err(local_err);
2553             ret = -1;
2554             goto out;
2555         }
2556 
2557         if (!drv->create_opts) {
2558             error_report("Format driver '%s' does not support image creation",
2559                          drv->format_name);
2560             ret = -1;
2561             goto out;
2562         }
2563 
2564         if (!proto_drv->create_opts) {
2565             error_report("Protocol driver '%s' does not support image creation",
2566                          proto_drv->format_name);
2567             ret = -1;
2568             goto out;
2569         }
2570 
2571         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2572         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2573 
2574         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2575         if (options) {
2576             if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
2577                 error_report_err(local_err);
2578                 ret = -1;
2579                 goto out;
2580             }
2581         }
2582 
2583         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
2584                             s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
2585         ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
2586         if (ret < 0) {
2587             goto out;
2588         }
2589     }
2590 
2591     /* Get backing file name if -o backing_file was used */
2592     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2593     if (out_baseimg_param) {
2594         out_baseimg = out_baseimg_param;
2595     }
2596     s.target_has_backing = (bool) out_baseimg;
2597 
2598     if (s.has_zero_init && s.target_has_backing) {
2599         error_report("Cannot use --target-is-zero when the destination "
2600                      "image has a backing file");
2601         goto out;
2602     }
2603 
2604     if (s.src_num > 1 && out_baseimg) {
2605         error_report("Having a backing file for the target makes no sense when "
2606                      "concatenating multiple input images");
2607         ret = -1;
2608         goto out;
2609     }
2610 
2611     if (out_baseimg_param) {
2612         if (!qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT)) {
2613             error_report("Use of backing file requires explicit "
2614                          "backing format");
2615             ret = -1;
2616             goto out;
2617         }
2618     }
2619 
2620     /* Check if compression is supported */
2621     if (s.compressed) {
2622         bool encryption =
2623             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2624         const char *encryptfmt =
2625             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2626         const char *preallocation =
2627             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2628 
2629         if (drv && !block_driver_can_compress(drv)) {
2630             error_report("Compression not supported for this file format");
2631             ret = -1;
2632             goto out;
2633         }
2634 
2635         if (encryption || encryptfmt) {
2636             error_report("Compression and encryption not supported at "
2637                          "the same time");
2638             ret = -1;
2639             goto out;
2640         }
2641 
2642         if (preallocation
2643             && strcmp(preallocation, "off"))
2644         {
2645             error_report("Compression and preallocation not supported at "
2646                          "the same time");
2647             ret = -1;
2648             goto out;
2649         }
2650     }
2651 
2652     /* Determine if bitmaps need copying */
2653     if (bitmaps) {
2654         if (s.src_num > 1) {
2655             error_report("Copying bitmaps only possible with single source");
2656             ret = -1;
2657             goto out;
2658         }
2659         ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken);
2660         if (ret < 0) {
2661             goto out;
2662         }
2663     }
2664 
2665     /*
2666      * The later open call will need any decryption secrets, and
2667      * bdrv_create() will purge "opts", so extract them now before
2668      * they are lost.
2669      */
2670     if (!skip_create) {
2671         open_opts = qdict_new();
2672         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2673 
2674         /* Create the new image */
2675         ret = bdrv_create(drv, out_filename, opts, &local_err);
2676         if (ret < 0) {
2677             error_reportf_err(local_err, "%s: error while converting %s: ",
2678                               out_filename, out_fmt);
2679             goto out;
2680         }
2681     }
2682 
2683     s.target_is_new = !skip_create;
2684 
2685     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2686     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2687     if (ret < 0) {
2688         error_report("Invalid cache option: %s", cache);
2689         goto out;
2690     }
2691 
2692     if (flags & BDRV_O_NOCACHE) {
2693         /*
2694          * If we open the target with O_DIRECT, it may be necessary to
2695          * extend its size to align to the physical sector size.
2696          */
2697         flags |= BDRV_O_RESIZE;
2698     }
2699 
2700     if (skip_create) {
2701         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2702                             flags, writethrough, s.quiet, false);
2703     } else {
2704         /* TODO ultimately we should allow --target-image-opts
2705          * to be used even when -n is not given.
2706          * That has to wait for bdrv_create to be improved
2707          * to allow filenames in option syntax
2708          */
2709         s.target = img_open_file(out_filename, open_opts, out_fmt,
2710                                  flags, writethrough, s.quiet, false);
2711         open_opts = NULL; /* blk_new_open will have freed it */
2712     }
2713     if (!s.target) {
2714         ret = -1;
2715         goto out;
2716     }
2717     out_bs = blk_bs(s.target);
2718 
2719     if (bitmaps && !bdrv_supports_persistent_dirty_bitmap(out_bs)) {
2720         error_report("Format driver '%s' does not support bitmaps",
2721                      out_bs->drv->format_name);
2722         ret = -1;
2723         goto out;
2724     }
2725 
2726     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2727         error_report("Compression not supported for this file format");
2728         ret = -1;
2729         goto out;
2730     }
2731 
2732     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2733      * or discard_alignment of the out_bs is greater. Limit to
2734      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2735     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2736                         MAX(s.buf_sectors,
2737                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2738                                 out_bs->bl.pdiscard_alignment >>
2739                                 BDRV_SECTOR_BITS)));
2740 
2741     /* try to align the write requests to the destination to avoid unnecessary
2742      * RMW cycles. */
2743     s.alignment = MAX(pow2floor(s.min_sparse),
2744                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2745                                    BDRV_SECTOR_SIZE));
2746     assert(is_power_of_2(s.alignment));
2747 
2748     if (skip_create) {
2749         int64_t output_sectors = blk_nb_sectors(s.target);
2750         if (output_sectors < 0) {
2751             error_report("unable to get output image length: %s",
2752                          strerror(-output_sectors));
2753             ret = -1;
2754             goto out;
2755         } else if (output_sectors < s.total_sectors) {
2756             error_report("output file is smaller than input file");
2757             ret = -1;
2758             goto out;
2759         }
2760     }
2761 
2762     if (s.target_has_backing && s.target_is_new) {
2763         /* Errors are treated as "backing length unknown" (which means
2764          * s.target_backing_sectors has to be negative, which it will
2765          * be automatically).  The backing file length is used only
2766          * for optimizations, so such a case is not fatal. */
2767         bdrv_graph_rdlock_main_loop();
2768         s.target_backing_sectors =
2769             bdrv_nb_sectors(bdrv_backing_chain_next(out_bs));
2770         bdrv_graph_rdunlock_main_loop();
2771     } else {
2772         s.target_backing_sectors = -1;
2773     }
2774 
2775     ret = bdrv_get_info(out_bs, &bdi);
2776     if (ret < 0) {
2777         if (s.compressed) {
2778             error_report("could not get block driver info");
2779             goto out;
2780         }
2781     } else {
2782         s.compressed = s.compressed || bdi.needs_compressed_writes;
2783         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2784     }
2785 
2786     if (rate_limit) {
2787         set_rate_limit(s.target, rate_limit);
2788     }
2789 
2790     ret = convert_do_copy(&s);
2791 
2792     /* Now copy the bitmaps */
2793     if (bitmaps && ret == 0) {
2794         ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken);
2795     }
2796 
2797 out:
2798     if (!ret) {
2799         qemu_progress_print(100, 0);
2800     }
2801     qemu_progress_end();
2802     qemu_opts_del(opts);
2803     qemu_opts_free(create_opts);
2804     qobject_unref(open_opts);
2805     blk_unref(s.target);
2806     if (s.src) {
2807         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2808             blk_unref(s.src[bs_i]);
2809         }
2810         g_free(s.src);
2811     }
2812     g_free(s.src_sectors);
2813     g_free(s.src_alignment);
2814 fail_getopt:
2815     qemu_opts_del(sn_opts);
2816     g_free(options);
2817 
2818     return !!ret;
2819 }
2820 
2821 
2822 static void dump_snapshots(BlockDriverState *bs)
2823 {
2824     QEMUSnapshotInfo *sn_tab, *sn;
2825     int nb_sns, i;
2826 
2827     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2828     if (nb_sns <= 0)
2829         return;
2830     printf("Snapshot list:\n");
2831     bdrv_snapshot_dump(NULL);
2832     printf("\n");
2833     for(i = 0; i < nb_sns; i++) {
2834         sn = &sn_tab[i];
2835         bdrv_snapshot_dump(sn);
2836         printf("\n");
2837     }
2838     g_free(sn_tab);
2839 }
2840 
2841 static void dump_json_block_graph_info_list(BlockGraphInfoList *list)
2842 {
2843     GString *str;
2844     QObject *obj;
2845     Visitor *v = qobject_output_visitor_new(&obj);
2846 
2847     visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort);
2848     visit_complete(v, &obj);
2849     str = qobject_to_json_pretty(obj, true);
2850     assert(str != NULL);
2851     printf("%s\n", str->str);
2852     qobject_unref(obj);
2853     visit_free(v);
2854     g_string_free(str, true);
2855 }
2856 
2857 static void dump_json_block_graph_info(BlockGraphInfo *info)
2858 {
2859     GString *str;
2860     QObject *obj;
2861     Visitor *v = qobject_output_visitor_new(&obj);
2862 
2863     visit_type_BlockGraphInfo(v, NULL, &info, &error_abort);
2864     visit_complete(v, &obj);
2865     str = qobject_to_json_pretty(obj, true);
2866     assert(str != NULL);
2867     printf("%s\n", str->str);
2868     qobject_unref(obj);
2869     visit_free(v);
2870     g_string_free(str, true);
2871 }
2872 
2873 static void dump_human_image_info(BlockGraphInfo *info, int indentation,
2874                                   const char *path)
2875 {
2876     BlockChildInfoList *children_list;
2877 
2878     bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation,
2879                         info->children == NULL);
2880 
2881     for (children_list = info->children; children_list;
2882          children_list = children_list->next)
2883     {
2884         BlockChildInfo *child = children_list->value;
2885         g_autofree char *child_path = NULL;
2886 
2887         printf("%*sChild node '%s%s':\n",
2888                indentation * 4, "", path, child->name);
2889         child_path = g_strdup_printf("%s%s/", path, child->name);
2890         dump_human_image_info(child->info, indentation + 1, child_path);
2891     }
2892 }
2893 
2894 static void dump_human_image_info_list(BlockGraphInfoList *list)
2895 {
2896     BlockGraphInfoList *elem;
2897     bool delim = false;
2898 
2899     for (elem = list; elem; elem = elem->next) {
2900         if (delim) {
2901             printf("\n");
2902         }
2903         delim = true;
2904 
2905         dump_human_image_info(elem->value, 0, "/");
2906     }
2907 }
2908 
2909 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2910 {
2911     return strcmp(a, b) == 0;
2912 }
2913 
2914 /**
2915  * Open an image file chain and return an BlockGraphInfoList
2916  *
2917  * @filename: topmost image filename
2918  * @fmt: topmost image format (may be NULL to autodetect)
2919  * @chain: true  - enumerate entire backing file chain
2920  *         false - only topmost image file
2921  *
2922  * Returns a list of BlockNodeInfo objects or NULL if there was an error
2923  * opening an image file.  If there was an error a message will have been
2924  * printed to stderr.
2925  */
2926 static BlockGraphInfoList *collect_image_info_list(bool image_opts,
2927                                                    const char *filename,
2928                                                    const char *fmt,
2929                                                    bool chain, bool force_share)
2930 {
2931     BlockGraphInfoList *head = NULL;
2932     BlockGraphInfoList **tail = &head;
2933     GHashTable *filenames;
2934     Error *err = NULL;
2935 
2936     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
2937 
2938     while (filename) {
2939         BlockBackend *blk;
2940         BlockDriverState *bs;
2941         BlockGraphInfo *info;
2942 
2943         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
2944             error_report("Backing file '%s' creates an infinite loop.",
2945                          filename);
2946             goto err;
2947         }
2948         g_hash_table_insert(filenames, (gpointer)filename, NULL);
2949 
2950         blk = img_open(image_opts, filename, fmt,
2951                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
2952                        force_share);
2953         if (!blk) {
2954             goto err;
2955         }
2956         bs = blk_bs(blk);
2957 
2958         /*
2959          * Note that the returned BlockGraphInfo object will not have
2960          * information about this image's backing node, because we have opened
2961          * it with BDRV_O_NO_BACKING.  Printing this object will therefore not
2962          * duplicate the backing chain information that we obtain by walking
2963          * the chain manually here.
2964          */
2965         bdrv_graph_rdlock_main_loop();
2966         bdrv_query_block_graph_info(bs, &info, &err);
2967         bdrv_graph_rdunlock_main_loop();
2968 
2969         if (err) {
2970             error_report_err(err);
2971             blk_unref(blk);
2972             goto err;
2973         }
2974 
2975         QAPI_LIST_APPEND(tail, info);
2976 
2977         blk_unref(blk);
2978 
2979         /* Clear parameters that only apply to the topmost image */
2980         filename = fmt = NULL;
2981         image_opts = false;
2982 
2983         if (chain) {
2984             if (info->full_backing_filename) {
2985                 filename = info->full_backing_filename;
2986             } else if (info->backing_filename) {
2987                 error_report("Could not determine absolute backing filename,"
2988                              " but backing filename '%s' present",
2989                              info->backing_filename);
2990                 goto err;
2991             }
2992             if (info->backing_filename_format) {
2993                 fmt = info->backing_filename_format;
2994             }
2995         }
2996     }
2997     g_hash_table_destroy(filenames);
2998     return head;
2999 
3000 err:
3001     qapi_free_BlockGraphInfoList(head);
3002     g_hash_table_destroy(filenames);
3003     return NULL;
3004 }
3005 
3006 static int img_info(int argc, char **argv)
3007 {
3008     int c;
3009     OutputFormat output_format = OFORMAT_HUMAN;
3010     bool chain = false;
3011     const char *filename, *fmt, *output;
3012     BlockGraphInfoList *list;
3013     bool image_opts = false;
3014     bool force_share = false;
3015 
3016     fmt = NULL;
3017     output = NULL;
3018     for(;;) {
3019         int option_index = 0;
3020         static const struct option long_options[] = {
3021             {"help", no_argument, 0, 'h'},
3022             {"format", required_argument, 0, 'f'},
3023             {"output", required_argument, 0, OPTION_OUTPUT},
3024             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
3025             {"object", required_argument, 0, OPTION_OBJECT},
3026             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3027             {"force-share", no_argument, 0, 'U'},
3028             {0, 0, 0, 0}
3029         };
3030         c = getopt_long(argc, argv, ":f:hU",
3031                         long_options, &option_index);
3032         if (c == -1) {
3033             break;
3034         }
3035         switch(c) {
3036         case ':':
3037             missing_argument(argv[optind - 1]);
3038             break;
3039         case '?':
3040             unrecognized_option(argv[optind - 1]);
3041             break;
3042         case 'h':
3043             help();
3044             break;
3045         case 'f':
3046             fmt = optarg;
3047             break;
3048         case 'U':
3049             force_share = true;
3050             break;
3051         case OPTION_OUTPUT:
3052             output = optarg;
3053             break;
3054         case OPTION_BACKING_CHAIN:
3055             chain = true;
3056             break;
3057         case OPTION_OBJECT:
3058             user_creatable_process_cmdline(optarg);
3059             break;
3060         case OPTION_IMAGE_OPTS:
3061             image_opts = true;
3062             break;
3063         }
3064     }
3065     if (optind != argc - 1) {
3066         error_exit("Expecting one image file name");
3067     }
3068     filename = argv[optind++];
3069 
3070     if (output && !strcmp(output, "json")) {
3071         output_format = OFORMAT_JSON;
3072     } else if (output && !strcmp(output, "human")) {
3073         output_format = OFORMAT_HUMAN;
3074     } else if (output) {
3075         error_report("--output must be used with human or json as argument.");
3076         return 1;
3077     }
3078 
3079     list = collect_image_info_list(image_opts, filename, fmt, chain,
3080                                    force_share);
3081     if (!list) {
3082         return 1;
3083     }
3084 
3085     switch (output_format) {
3086     case OFORMAT_HUMAN:
3087         dump_human_image_info_list(list);
3088         break;
3089     case OFORMAT_JSON:
3090         if (chain) {
3091             dump_json_block_graph_info_list(list);
3092         } else {
3093             dump_json_block_graph_info(list->value);
3094         }
3095         break;
3096     }
3097 
3098     qapi_free_BlockGraphInfoList(list);
3099     return 0;
3100 }
3101 
3102 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
3103                           MapEntry *next)
3104 {
3105     switch (output_format) {
3106     case OFORMAT_HUMAN:
3107         if (e->data && !e->has_offset) {
3108             error_report("File contains external, encrypted or compressed clusters.");
3109             return -1;
3110         }
3111         if (e->data && !e->zero) {
3112             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
3113                    e->start, e->length,
3114                    e->has_offset ? e->offset : 0,
3115                    e->filename ?: "");
3116         }
3117         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
3118          * Modify the flags here to allow more coalescing.
3119          */
3120         if (next && (!next->data || next->zero)) {
3121             next->data = false;
3122             next->zero = true;
3123         }
3124         break;
3125     case OFORMAT_JSON:
3126         printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
3127                " \"depth\": %"PRId64", \"present\": %s, \"zero\": %s,"
3128                " \"data\": %s, \"compressed\": %s",
3129                e->start, e->length, e->depth,
3130                e->present ? "true" : "false",
3131                e->zero ? "true" : "false",
3132                e->data ? "true" : "false",
3133                e->compressed ? "true" : "false");
3134         if (e->has_offset) {
3135             printf(", \"offset\": %"PRId64"", e->offset);
3136         }
3137         putchar('}');
3138 
3139         if (next) {
3140             puts(",");
3141         }
3142         break;
3143     }
3144     return 0;
3145 }
3146 
3147 static int get_block_status(BlockDriverState *bs, int64_t offset,
3148                             int64_t bytes, MapEntry *e)
3149 {
3150     int ret;
3151     int depth;
3152     BlockDriverState *file;
3153     bool has_offset;
3154     int64_t map;
3155     char *filename = NULL;
3156 
3157     GLOBAL_STATE_CODE();
3158     GRAPH_RDLOCK_GUARD_MAINLOOP();
3159 
3160     /* As an optimization, we could cache the current range of unallocated
3161      * clusters in each file of the chain, and avoid querying the same
3162      * range repeatedly.
3163      */
3164 
3165     depth = 0;
3166     for (;;) {
3167         bs = bdrv_skip_filters(bs);
3168         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
3169         if (ret < 0) {
3170             return ret;
3171         }
3172         assert(bytes);
3173         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
3174             break;
3175         }
3176         bs = bdrv_cow_bs(bs);
3177         if (bs == NULL) {
3178             ret = 0;
3179             break;
3180         }
3181 
3182         depth++;
3183     }
3184 
3185     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
3186 
3187     if (file && has_offset) {
3188         bdrv_refresh_filename(file);
3189         filename = file->filename;
3190     }
3191 
3192     *e = (MapEntry) {
3193         .start = offset,
3194         .length = bytes,
3195         .data = !!(ret & BDRV_BLOCK_DATA),
3196         .zero = !!(ret & BDRV_BLOCK_ZERO),
3197         .compressed = !!(ret & BDRV_BLOCK_COMPRESSED),
3198         .offset = map,
3199         .has_offset = has_offset,
3200         .depth = depth,
3201         .present = !!(ret & BDRV_BLOCK_ALLOCATED),
3202         .filename = filename,
3203     };
3204 
3205     return 0;
3206 }
3207 
3208 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
3209 {
3210     if (curr->length == 0) {
3211         return false;
3212     }
3213     if (curr->zero != next->zero ||
3214         curr->data != next->data ||
3215         curr->compressed != next->compressed ||
3216         curr->depth != next->depth ||
3217         curr->present != next->present ||
3218         !curr->filename != !next->filename ||
3219         curr->has_offset != next->has_offset) {
3220         return false;
3221     }
3222     if (curr->filename && strcmp(curr->filename, next->filename)) {
3223         return false;
3224     }
3225     if (curr->has_offset && curr->offset + curr->length != next->offset) {
3226         return false;
3227     }
3228     return true;
3229 }
3230 
3231 static int img_map(int argc, char **argv)
3232 {
3233     int c;
3234     OutputFormat output_format = OFORMAT_HUMAN;
3235     BlockBackend *blk;
3236     BlockDriverState *bs;
3237     const char *filename, *fmt, *output;
3238     int64_t length;
3239     MapEntry curr = { .length = 0 }, next;
3240     int ret = 0;
3241     bool image_opts = false;
3242     bool force_share = false;
3243     int64_t start_offset = 0;
3244     int64_t max_length = -1;
3245 
3246     fmt = NULL;
3247     output = NULL;
3248     for (;;) {
3249         int option_index = 0;
3250         static const struct option long_options[] = {
3251             {"help", no_argument, 0, 'h'},
3252             {"format", required_argument, 0, 'f'},
3253             {"output", required_argument, 0, OPTION_OUTPUT},
3254             {"object", required_argument, 0, OPTION_OBJECT},
3255             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3256             {"force-share", no_argument, 0, 'U'},
3257             {"start-offset", required_argument, 0, 's'},
3258             {"max-length", required_argument, 0, 'l'},
3259             {0, 0, 0, 0}
3260         };
3261         c = getopt_long(argc, argv, ":f:s:l:hU",
3262                         long_options, &option_index);
3263         if (c == -1) {
3264             break;
3265         }
3266         switch (c) {
3267         case ':':
3268             missing_argument(argv[optind - 1]);
3269             break;
3270         case '?':
3271             unrecognized_option(argv[optind - 1]);
3272             break;
3273         case 'h':
3274             help();
3275             break;
3276         case 'f':
3277             fmt = optarg;
3278             break;
3279         case 'U':
3280             force_share = true;
3281             break;
3282         case OPTION_OUTPUT:
3283             output = optarg;
3284             break;
3285         case 's':
3286             start_offset = cvtnum("start offset", optarg);
3287             if (start_offset < 0) {
3288                 return 1;
3289             }
3290             break;
3291         case 'l':
3292             max_length = cvtnum("max length", optarg);
3293             if (max_length < 0) {
3294                 return 1;
3295             }
3296             break;
3297         case OPTION_OBJECT:
3298             user_creatable_process_cmdline(optarg);
3299             break;
3300         case OPTION_IMAGE_OPTS:
3301             image_opts = true;
3302             break;
3303         }
3304     }
3305     if (optind != argc - 1) {
3306         error_exit("Expecting one image file name");
3307     }
3308     filename = argv[optind];
3309 
3310     if (output && !strcmp(output, "json")) {
3311         output_format = OFORMAT_JSON;
3312     } else if (output && !strcmp(output, "human")) {
3313         output_format = OFORMAT_HUMAN;
3314     } else if (output) {
3315         error_report("--output must be used with human or json as argument.");
3316         return 1;
3317     }
3318 
3319     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3320     if (!blk) {
3321         return 1;
3322     }
3323     bs = blk_bs(blk);
3324 
3325     if (output_format == OFORMAT_HUMAN) {
3326         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3327     } else if (output_format == OFORMAT_JSON) {
3328         putchar('[');
3329     }
3330 
3331     length = blk_getlength(blk);
3332     if (length < 0) {
3333         error_report("Failed to get size for '%s'", filename);
3334         return 1;
3335     }
3336     if (max_length != -1) {
3337         length = MIN(start_offset + max_length, length);
3338     }
3339 
3340     curr.start = start_offset;
3341     while (curr.start + curr.length < length) {
3342         int64_t offset = curr.start + curr.length;
3343         int64_t n = length - offset;
3344 
3345         ret = get_block_status(bs, offset, n, &next);
3346         if (ret < 0) {
3347             error_report("Could not read file metadata: %s", strerror(-ret));
3348             goto out;
3349         }
3350 
3351         if (entry_mergeable(&curr, &next)) {
3352             curr.length += next.length;
3353             continue;
3354         }
3355 
3356         if (curr.length > 0) {
3357             ret = dump_map_entry(output_format, &curr, &next);
3358             if (ret < 0) {
3359                 goto out;
3360             }
3361         }
3362         curr = next;
3363     }
3364 
3365     ret = dump_map_entry(output_format, &curr, NULL);
3366     if (output_format == OFORMAT_JSON) {
3367         puts("]");
3368     }
3369 
3370 out:
3371     blk_unref(blk);
3372     return ret < 0;
3373 }
3374 
3375 #define SNAPSHOT_LIST   1
3376 #define SNAPSHOT_CREATE 2
3377 #define SNAPSHOT_APPLY  3
3378 #define SNAPSHOT_DELETE 4
3379 
3380 static int img_snapshot(int argc, char **argv)
3381 {
3382     BlockBackend *blk;
3383     BlockDriverState *bs;
3384     QEMUSnapshotInfo sn;
3385     char *filename, *snapshot_name = NULL;
3386     int c, ret = 0, bdrv_oflags;
3387     int action = 0;
3388     bool quiet = false;
3389     Error *err = NULL;
3390     bool image_opts = false;
3391     bool force_share = false;
3392     int64_t rt;
3393 
3394     bdrv_oflags = BDRV_O_RDWR;
3395     /* Parse commandline parameters */
3396     for(;;) {
3397         static const struct option long_options[] = {
3398             {"help", no_argument, 0, 'h'},
3399             {"object", required_argument, 0, OPTION_OBJECT},
3400             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3401             {"force-share", no_argument, 0, 'U'},
3402             {0, 0, 0, 0}
3403         };
3404         c = getopt_long(argc, argv, ":la:c:d:hqU",
3405                         long_options, NULL);
3406         if (c == -1) {
3407             break;
3408         }
3409         switch(c) {
3410         case ':':
3411             missing_argument(argv[optind - 1]);
3412             break;
3413         case '?':
3414             unrecognized_option(argv[optind - 1]);
3415             break;
3416         case 'h':
3417             help();
3418             return 0;
3419         case 'l':
3420             if (action) {
3421                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3422                 return 0;
3423             }
3424             action = SNAPSHOT_LIST;
3425             bdrv_oflags &= ~BDRV_O_RDWR; /* no need for RW */
3426             break;
3427         case 'a':
3428             if (action) {
3429                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3430                 return 0;
3431             }
3432             action = SNAPSHOT_APPLY;
3433             snapshot_name = optarg;
3434             break;
3435         case 'c':
3436             if (action) {
3437                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3438                 return 0;
3439             }
3440             action = SNAPSHOT_CREATE;
3441             snapshot_name = optarg;
3442             break;
3443         case 'd':
3444             if (action) {
3445                 error_exit("Cannot mix '-l', '-a', '-c', '-d'");
3446                 return 0;
3447             }
3448             action = SNAPSHOT_DELETE;
3449             snapshot_name = optarg;
3450             break;
3451         case 'q':
3452             quiet = true;
3453             break;
3454         case 'U':
3455             force_share = true;
3456             break;
3457         case OPTION_OBJECT:
3458             user_creatable_process_cmdline(optarg);
3459             break;
3460         case OPTION_IMAGE_OPTS:
3461             image_opts = true;
3462             break;
3463         }
3464     }
3465 
3466     if (optind != argc - 1) {
3467         error_exit("Expecting one image file name");
3468     }
3469     filename = argv[optind++];
3470 
3471     /* Open the image */
3472     blk = img_open(image_opts, filename, NULL, bdrv_oflags, false, quiet,
3473                    force_share);
3474     if (!blk) {
3475         return 1;
3476     }
3477     bs = blk_bs(blk);
3478 
3479     /* Perform the requested action */
3480     switch(action) {
3481     case SNAPSHOT_LIST:
3482         dump_snapshots(bs);
3483         break;
3484 
3485     case SNAPSHOT_CREATE:
3486         memset(&sn, 0, sizeof(sn));
3487         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3488 
3489         rt = g_get_real_time();
3490         sn.date_sec = rt / G_USEC_PER_SEC;
3491         sn.date_nsec = (rt % G_USEC_PER_SEC) * 1000;
3492 
3493         bdrv_graph_rdlock_main_loop();
3494         ret = bdrv_snapshot_create(bs, &sn);
3495         bdrv_graph_rdunlock_main_loop();
3496 
3497         if (ret) {
3498             error_report("Could not create snapshot '%s': %s",
3499                 snapshot_name, strerror(-ret));
3500         }
3501         break;
3502 
3503     case SNAPSHOT_APPLY:
3504         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3505         if (ret) {
3506             error_reportf_err(err, "Could not apply snapshot '%s': ",
3507                               snapshot_name);
3508         }
3509         break;
3510 
3511     case SNAPSHOT_DELETE:
3512         bdrv_graph_rdlock_main_loop();
3513         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3514         if (ret < 0) {
3515             error_report("Could not delete snapshot '%s': snapshot not "
3516                          "found", snapshot_name);
3517             ret = 1;
3518         } else {
3519             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3520             if (ret < 0) {
3521                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3522                                   snapshot_name);
3523                 ret = 1;
3524             }
3525         }
3526         bdrv_graph_rdunlock_main_loop();
3527         break;
3528     }
3529 
3530     /* Cleanup */
3531     blk_unref(blk);
3532     if (ret) {
3533         return 1;
3534     }
3535     return 0;
3536 }
3537 
3538 static int img_rebase(int argc, char **argv)
3539 {
3540     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3541     uint8_t *buf_old = NULL;
3542     uint8_t *buf_new = NULL;
3543     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3544     BlockDriverState *unfiltered_bs, *unfiltered_bs_cow;
3545     BlockDriverInfo bdi = {0};
3546     char *filename;
3547     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3548     int c, flags, src_flags, ret;
3549     BdrvRequestFlags write_flags = 0;
3550     bool writethrough, src_writethrough;
3551     int unsafe = 0;
3552     bool force_share = false;
3553     int progress = 0;
3554     bool quiet = false;
3555     bool compress = false;
3556     Error *local_err = NULL;
3557     bool image_opts = false;
3558     int64_t write_align;
3559 
3560     /* Parse commandline parameters */
3561     fmt = NULL;
3562     cache = BDRV_DEFAULT_CACHE;
3563     src_cache = BDRV_DEFAULT_CACHE;
3564     out_baseimg = NULL;
3565     out_basefmt = NULL;
3566     for(;;) {
3567         static const struct option long_options[] = {
3568             {"help", no_argument, 0, 'h'},
3569             {"object", required_argument, 0, OPTION_OBJECT},
3570             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3571             {"force-share", no_argument, 0, 'U'},
3572             {"compress", no_argument, 0, 'c'},
3573             {0, 0, 0, 0}
3574         };
3575         c = getopt_long(argc, argv, ":hf:F:b:upt:T:qUc",
3576                         long_options, NULL);
3577         if (c == -1) {
3578             break;
3579         }
3580         switch(c) {
3581         case ':':
3582             missing_argument(argv[optind - 1]);
3583             break;
3584         case '?':
3585             unrecognized_option(argv[optind - 1]);
3586             break;
3587         case 'h':
3588             help();
3589             return 0;
3590         case 'f':
3591             fmt = optarg;
3592             break;
3593         case 'F':
3594             out_basefmt = optarg;
3595             break;
3596         case 'b':
3597             out_baseimg = optarg;
3598             break;
3599         case 'u':
3600             unsafe = 1;
3601             break;
3602         case 'p':
3603             progress = 1;
3604             break;
3605         case 't':
3606             cache = optarg;
3607             break;
3608         case 'T':
3609             src_cache = optarg;
3610             break;
3611         case 'q':
3612             quiet = true;
3613             break;
3614         case OPTION_OBJECT:
3615             user_creatable_process_cmdline(optarg);
3616             break;
3617         case OPTION_IMAGE_OPTS:
3618             image_opts = true;
3619             break;
3620         case 'U':
3621             force_share = true;
3622             break;
3623         case 'c':
3624             compress = true;
3625             break;
3626         }
3627     }
3628 
3629     if (quiet) {
3630         progress = 0;
3631     }
3632 
3633     if (optind != argc - 1) {
3634         error_exit("Expecting one image file name");
3635     }
3636     if (!unsafe && !out_baseimg) {
3637         error_exit("Must specify backing file (-b) or use unsafe mode (-u)");
3638     }
3639     filename = argv[optind++];
3640 
3641     qemu_progress_init(progress, 2.0);
3642     qemu_progress_print(0, 100);
3643 
3644     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3645     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3646     if (ret < 0) {
3647         error_report("Invalid cache option: %s", cache);
3648         goto out;
3649     }
3650 
3651     src_flags = 0;
3652     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3653     if (ret < 0) {
3654         error_report("Invalid source cache option: %s", src_cache);
3655         goto out;
3656     }
3657 
3658     /* The source files are opened read-only, don't care about WCE */
3659     assert((src_flags & BDRV_O_RDWR) == 0);
3660     (void) src_writethrough;
3661 
3662     /*
3663      * Open the images.
3664      *
3665      * Ignore the old backing file for unsafe rebase in case we want to correct
3666      * the reference to a renamed or moved backing file.
3667      */
3668     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3669                    false);
3670     if (!blk) {
3671         ret = -1;
3672         goto out;
3673     }
3674     bs = blk_bs(blk);
3675 
3676     bdrv_graph_rdlock_main_loop();
3677     unfiltered_bs = bdrv_skip_filters(bs);
3678     unfiltered_bs_cow = bdrv_cow_bs(unfiltered_bs);
3679     bdrv_graph_rdunlock_main_loop();
3680 
3681     if (compress && !block_driver_can_compress(unfiltered_bs->drv)) {
3682         error_report("Compression not supported for this file format");
3683         ret = -1;
3684         goto out;
3685     } else if (compress) {
3686         write_flags |= BDRV_REQ_WRITE_COMPRESSED;
3687     }
3688 
3689     if (out_basefmt != NULL) {
3690         if (bdrv_find_format(out_basefmt) == NULL) {
3691             error_report("Invalid format name: '%s'", out_basefmt);
3692             ret = -1;
3693             goto out;
3694         }
3695     }
3696 
3697     /*
3698      * We need overlay subcluster size (or cluster size in case writes are
3699      * compressed) to make sure write requests are aligned.
3700      */
3701     ret = bdrv_get_info(unfiltered_bs, &bdi);
3702     if (ret < 0) {
3703         error_report("could not get block driver info");
3704         goto out;
3705     } else if (bdi.subcluster_size == 0) {
3706         bdi.cluster_size = bdi.subcluster_size = 1;
3707     }
3708 
3709     write_align = compress ? bdi.cluster_size : bdi.subcluster_size;
3710 
3711     /* For safe rebasing we need to compare old and new backing file */
3712     if (!unsafe) {
3713         QDict *options = NULL;
3714         BlockDriverState *base_bs;
3715 
3716         bdrv_graph_rdlock_main_loop();
3717         base_bs = bdrv_cow_bs(unfiltered_bs);
3718         bdrv_graph_rdunlock_main_loop();
3719 
3720         if (base_bs) {
3721             blk_old_backing = blk_new(qemu_get_aio_context(),
3722                                       BLK_PERM_CONSISTENT_READ,
3723                                       BLK_PERM_ALL);
3724             ret = blk_insert_bs(blk_old_backing, base_bs,
3725                                 &local_err);
3726             if (ret < 0) {
3727                 error_reportf_err(local_err,
3728                                   "Could not reuse old backing file '%s': ",
3729                                   base_bs->filename);
3730                 goto out;
3731             }
3732         } else {
3733             blk_old_backing = NULL;
3734         }
3735 
3736         if (out_baseimg[0]) {
3737             const char *overlay_filename;
3738             char *out_real_path;
3739 
3740             options = qdict_new();
3741             if (out_basefmt) {
3742                 qdict_put_str(options, "driver", out_basefmt);
3743             }
3744             if (force_share) {
3745                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3746             }
3747 
3748             bdrv_graph_rdlock_main_loop();
3749             bdrv_refresh_filename(bs);
3750             bdrv_graph_rdunlock_main_loop();
3751             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3752                                                      : bs->filename;
3753             out_real_path =
3754                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3755                                                              out_baseimg,
3756                                                              &local_err);
3757             if (local_err) {
3758                 qobject_unref(options);
3759                 error_reportf_err(local_err,
3760                                   "Could not resolve backing filename: ");
3761                 ret = -1;
3762                 goto out;
3763             }
3764 
3765             /*
3766              * Find out whether we rebase an image on top of a previous image
3767              * in its chain.
3768              */
3769             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3770             if (prefix_chain_bs) {
3771                 qobject_unref(options);
3772                 g_free(out_real_path);
3773 
3774                 blk_new_backing = blk_new(qemu_get_aio_context(),
3775                                           BLK_PERM_CONSISTENT_READ,
3776                                           BLK_PERM_ALL);
3777                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3778                                     &local_err);
3779                 if (ret < 0) {
3780                     error_reportf_err(local_err,
3781                                       "Could not reuse backing file '%s': ",
3782                                       out_baseimg);
3783                     goto out;
3784                 }
3785             } else {
3786                 blk_new_backing = blk_new_open(out_real_path, NULL,
3787                                                options, src_flags, &local_err);
3788                 g_free(out_real_path);
3789                 if (!blk_new_backing) {
3790                     error_reportf_err(local_err,
3791                                       "Could not open new backing file '%s': ",
3792                                       out_baseimg);
3793                     ret = -1;
3794                     goto out;
3795                 }
3796             }
3797         }
3798     }
3799 
3800     /*
3801      * Check each unallocated cluster in the COW file. If it is unallocated,
3802      * accesses go to the backing file. We must therefore compare this cluster
3803      * in the old and new backing file, and if they differ we need to copy it
3804      * from the old backing file into the COW file.
3805      *
3806      * If qemu-img crashes during this step, no harm is done. The content of
3807      * the image is the same as the original one at any time.
3808      */
3809     if (!unsafe) {
3810         int64_t size;
3811         int64_t old_backing_size = 0;
3812         int64_t new_backing_size = 0;
3813         uint64_t offset;
3814         int64_t n, n_old = 0, n_new = 0;
3815         float local_progress = 0;
3816 
3817         if (blk_old_backing && bdrv_opt_mem_align(blk_bs(blk_old_backing)) >
3818             bdrv_opt_mem_align(blk_bs(blk))) {
3819             buf_old = blk_blockalign(blk_old_backing, IO_BUF_SIZE);
3820         } else {
3821             buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3822         }
3823         buf_new = blk_blockalign(blk_new_backing, IO_BUF_SIZE);
3824 
3825         size = blk_getlength(blk);
3826         if (size < 0) {
3827             error_report("Could not get size of '%s': %s",
3828                          filename, strerror(-size));
3829             ret = -1;
3830             goto out;
3831         }
3832         if (blk_old_backing) {
3833             old_backing_size = blk_getlength(blk_old_backing);
3834             if (old_backing_size < 0) {
3835                 char backing_name[PATH_MAX];
3836 
3837                 bdrv_get_backing_filename(bs, backing_name,
3838                                           sizeof(backing_name));
3839                 error_report("Could not get size of '%s': %s",
3840                              backing_name, strerror(-old_backing_size));
3841                 ret = -1;
3842                 goto out;
3843             }
3844         }
3845         if (blk_new_backing) {
3846             new_backing_size = blk_getlength(blk_new_backing);
3847             if (new_backing_size < 0) {
3848                 error_report("Could not get size of '%s': %s",
3849                              out_baseimg, strerror(-new_backing_size));
3850                 ret = -1;
3851                 goto out;
3852             }
3853         }
3854 
3855         if (size != 0) {
3856             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3857         }
3858 
3859         for (offset = 0; offset < size; offset += n) {
3860             bool old_backing_eof = false;
3861             int64_t n_alloc;
3862 
3863             /* How many bytes can we handle with the next read? */
3864             n = MIN(IO_BUF_SIZE, size - offset);
3865 
3866             /* If the cluster is allocated, we don't need to take action */
3867             ret = bdrv_is_allocated(unfiltered_bs, offset, n, &n);
3868             if (ret < 0) {
3869                 error_report("error while reading image metadata: %s",
3870                              strerror(-ret));
3871                 goto out;
3872             }
3873             if (ret) {
3874                 continue;
3875             }
3876 
3877             if (prefix_chain_bs) {
3878                 uint64_t bytes = n;
3879 
3880                 /*
3881                  * If cluster wasn't changed since prefix_chain, we don't need
3882                  * to take action
3883                  */
3884                 ret = bdrv_is_allocated_above(unfiltered_bs_cow,
3885                                               prefix_chain_bs, false,
3886                                               offset, n, &n);
3887                 if (ret < 0) {
3888                     error_report("error while reading image metadata: %s",
3889                                  strerror(-ret));
3890                     goto out;
3891                 }
3892                 if (!ret && n) {
3893                     continue;
3894                 }
3895                 if (!n) {
3896                     /*
3897                      * If we've reached EOF of the old backing, it means that
3898                      * offsets beyond the old backing size were read as zeroes.
3899                      * Now we will need to explicitly zero the cluster in
3900                      * order to preserve that state after the rebase.
3901                      */
3902                     n = bytes;
3903                 }
3904             }
3905 
3906             /*
3907              * At this point we know that the region [offset; offset + n)
3908              * is unallocated within the target image.  This region might be
3909              * unaligned to the target image's (sub)cluster boundaries, as
3910              * old backing may have smaller clusters (or have subclusters).
3911              * We extend it to the aligned boundaries to avoid CoW on
3912              * partial writes in blk_pwrite(),
3913              */
3914             n += offset - QEMU_ALIGN_DOWN(offset, write_align);
3915             offset = QEMU_ALIGN_DOWN(offset, write_align);
3916             n += QEMU_ALIGN_UP(offset + n, write_align) - (offset + n);
3917             n = MIN(n, size - offset);
3918             assert(!bdrv_is_allocated(unfiltered_bs, offset, n, &n_alloc) &&
3919                    n_alloc == n);
3920 
3921             /*
3922              * Much like with the target image, we'll try to read as much
3923              * of the old and new backings as we can.
3924              */
3925             n_old = MIN(n, MAX(0, old_backing_size - (int64_t) offset));
3926             n_new = MIN(n, MAX(0, new_backing_size - (int64_t) offset));
3927 
3928             /*
3929              * Read old and new backing file and take into consideration that
3930              * backing files may be smaller than the COW image.
3931              */
3932             memset(buf_old + n_old, 0, n - n_old);
3933             if (!n_old) {
3934                 old_backing_eof = true;
3935             } else {
3936                 ret = blk_pread(blk_old_backing, offset, n_old, buf_old, 0);
3937                 if (ret < 0) {
3938                     error_report("error while reading from old backing file");
3939                     goto out;
3940                 }
3941             }
3942 
3943             memset(buf_new + n_new, 0, n - n_new);
3944             if (n_new) {
3945                 ret = blk_pread(blk_new_backing, offset, n_new, buf_new, 0);
3946                 if (ret < 0) {
3947                     error_report("error while reading from new backing file");
3948                     goto out;
3949                 }
3950             }
3951 
3952             /* If they differ, we need to write to the COW file */
3953             uint64_t written = 0;
3954 
3955             while (written < n) {
3956                 int64_t pnum;
3957 
3958                 if (compare_buffers(buf_old + written, buf_new + written,
3959                                     n - written, write_align, &pnum))
3960                 {
3961                     if (old_backing_eof) {
3962                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
3963                     } else {
3964                         assert(written + pnum <= IO_BUF_SIZE);
3965                         ret = blk_pwrite(blk, offset + written, pnum,
3966                                          buf_old + written, write_flags);
3967                     }
3968                     if (ret < 0) {
3969                         error_report("Error while writing to COW image: %s",
3970                             strerror(-ret));
3971                         goto out;
3972                     }
3973                 }
3974 
3975                 written += pnum;
3976                 if (offset + written >= old_backing_size) {
3977                     old_backing_eof = true;
3978                 }
3979             }
3980             qemu_progress_print(local_progress, 100);
3981         }
3982     }
3983 
3984     /*
3985      * Change the backing file. All clusters that are different from the old
3986      * backing file are overwritten in the COW file now, so the visible content
3987      * doesn't change when we switch the backing file.
3988      */
3989     if (out_baseimg && *out_baseimg) {
3990         ret = bdrv_change_backing_file(unfiltered_bs, out_baseimg, out_basefmt,
3991                                        true);
3992     } else {
3993         ret = bdrv_change_backing_file(unfiltered_bs, NULL, NULL, false);
3994     }
3995 
3996     if (ret == -ENOSPC) {
3997         error_report("Could not change the backing file to '%s': No "
3998                      "space left in the file header", out_baseimg);
3999     } else if (ret == -EINVAL && out_baseimg && !out_basefmt) {
4000         error_report("Could not change the backing file to '%s': backing "
4001                      "format must be specified", out_baseimg);
4002     } else if (ret < 0) {
4003         error_report("Could not change the backing file to '%s': %s",
4004             out_baseimg, strerror(-ret));
4005     }
4006 
4007     qemu_progress_print(100, 0);
4008     /*
4009      * TODO At this point it is possible to check if any clusters that are
4010      * allocated in the COW file are the same in the backing file. If so, they
4011      * could be dropped from the COW file. Don't do this before switching the
4012      * backing file, in case of a crash this would lead to corruption.
4013      */
4014 out:
4015     qemu_progress_end();
4016     /* Cleanup */
4017     if (!unsafe) {
4018         blk_unref(blk_old_backing);
4019         blk_unref(blk_new_backing);
4020     }
4021     qemu_vfree(buf_old);
4022     qemu_vfree(buf_new);
4023 
4024     blk_unref(blk);
4025     if (ret) {
4026         return 1;
4027     }
4028     return 0;
4029 }
4030 
4031 static int img_resize(int argc, char **argv)
4032 {
4033     Error *err = NULL;
4034     int c, ret, relative;
4035     const char *filename, *fmt, *size;
4036     int64_t n, total_size, current_size;
4037     bool quiet = false;
4038     BlockBackend *blk = NULL;
4039     PreallocMode prealloc = PREALLOC_MODE_OFF;
4040     QemuOpts *param;
4041 
4042     static QemuOptsList resize_options = {
4043         .name = "resize_options",
4044         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
4045         .desc = {
4046             {
4047                 .name = BLOCK_OPT_SIZE,
4048                 .type = QEMU_OPT_SIZE,
4049                 .help = "Virtual disk size"
4050             }, {
4051                 /* end of list */
4052             }
4053         },
4054     };
4055     bool image_opts = false;
4056     bool shrink = false;
4057 
4058     /* Remove size from argv manually so that negative numbers are not treated
4059      * as options by getopt. */
4060     if (argc < 3) {
4061         error_exit("Not enough arguments");
4062         return 1;
4063     }
4064 
4065     size = argv[--argc];
4066 
4067     /* Parse getopt arguments */
4068     fmt = NULL;
4069     for(;;) {
4070         static const struct option long_options[] = {
4071             {"help", no_argument, 0, 'h'},
4072             {"object", required_argument, 0, OPTION_OBJECT},
4073             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4074             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
4075             {"shrink", no_argument, 0, OPTION_SHRINK},
4076             {0, 0, 0, 0}
4077         };
4078         c = getopt_long(argc, argv, ":f:hq",
4079                         long_options, NULL);
4080         if (c == -1) {
4081             break;
4082         }
4083         switch(c) {
4084         case ':':
4085             missing_argument(argv[optind - 1]);
4086             break;
4087         case '?':
4088             unrecognized_option(argv[optind - 1]);
4089             break;
4090         case 'h':
4091             help();
4092             break;
4093         case 'f':
4094             fmt = optarg;
4095             break;
4096         case 'q':
4097             quiet = true;
4098             break;
4099         case OPTION_OBJECT:
4100             user_creatable_process_cmdline(optarg);
4101             break;
4102         case OPTION_IMAGE_OPTS:
4103             image_opts = true;
4104             break;
4105         case OPTION_PREALLOCATION:
4106             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
4107                                        PREALLOC_MODE__MAX, NULL);
4108             if (prealloc == PREALLOC_MODE__MAX) {
4109                 error_report("Invalid preallocation mode '%s'", optarg);
4110                 return 1;
4111             }
4112             break;
4113         case OPTION_SHRINK:
4114             shrink = true;
4115             break;
4116         }
4117     }
4118     if (optind != argc - 1) {
4119         error_exit("Expecting image file name and size");
4120     }
4121     filename = argv[optind++];
4122 
4123     /* Choose grow, shrink, or absolute resize mode */
4124     switch (size[0]) {
4125     case '+':
4126         relative = 1;
4127         size++;
4128         break;
4129     case '-':
4130         relative = -1;
4131         size++;
4132         break;
4133     default:
4134         relative = 0;
4135         break;
4136     }
4137 
4138     /* Parse size */
4139     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
4140     if (!qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err)) {
4141         error_report_err(err);
4142         ret = -1;
4143         qemu_opts_del(param);
4144         goto out;
4145     }
4146     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
4147     qemu_opts_del(param);
4148 
4149     blk = img_open(image_opts, filename, fmt,
4150                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
4151                    false);
4152     if (!blk) {
4153         ret = -1;
4154         goto out;
4155     }
4156 
4157     current_size = blk_getlength(blk);
4158     if (current_size < 0) {
4159         error_report("Failed to inquire current image length: %s",
4160                      strerror(-current_size));
4161         ret = -1;
4162         goto out;
4163     }
4164 
4165     if (relative) {
4166         total_size = current_size + n * relative;
4167     } else {
4168         total_size = n;
4169     }
4170     if (total_size <= 0) {
4171         error_report("New image size must be positive");
4172         ret = -1;
4173         goto out;
4174     }
4175 
4176     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
4177         error_report("Preallocation can only be used for growing images");
4178         ret = -1;
4179         goto out;
4180     }
4181 
4182     if (total_size < current_size && !shrink) {
4183         error_report("Use the --shrink option to perform a shrink operation.");
4184         warn_report("Shrinking an image will delete all data beyond the "
4185                     "shrunken image's end. Before performing such an "
4186                     "operation, make sure there is no important data there.");
4187         ret = -1;
4188         goto out;
4189     }
4190 
4191     /*
4192      * The user expects the image to have the desired size after
4193      * resizing, so pass @exact=true.  It is of no use to report
4194      * success when the image has not actually been resized.
4195      */
4196     ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
4197     if (!ret) {
4198         qprintf(quiet, "Image resized.\n");
4199     } else {
4200         error_report_err(err);
4201     }
4202 out:
4203     blk_unref(blk);
4204     if (ret) {
4205         return 1;
4206     }
4207     return 0;
4208 }
4209 
4210 static void amend_status_cb(BlockDriverState *bs,
4211                             int64_t offset, int64_t total_work_size,
4212                             void *opaque)
4213 {
4214     qemu_progress_print(100.f * offset / total_work_size, 0);
4215 }
4216 
4217 static int print_amend_option_help(const char *format)
4218 {
4219     BlockDriver *drv;
4220 
4221     GRAPH_RDLOCK_GUARD_MAINLOOP();
4222 
4223     /* Find driver and parse its options */
4224     drv = bdrv_find_format(format);
4225     if (!drv) {
4226         error_report("Unknown file format '%s'", format);
4227         return 1;
4228     }
4229 
4230     if (!drv->bdrv_amend_options) {
4231         error_report("Format driver '%s' does not support option amendment",
4232                      format);
4233         return 1;
4234     }
4235 
4236     /* Every driver supporting amendment must have amend_opts */
4237     assert(drv->amend_opts);
4238 
4239     printf("Amend options for '%s':\n", format);
4240     qemu_opts_print_help(drv->amend_opts, false);
4241     return 0;
4242 }
4243 
4244 static int img_amend(int argc, char **argv)
4245 {
4246     Error *err = NULL;
4247     int c, ret = 0;
4248     char *options = NULL;
4249     QemuOptsList *amend_opts = NULL;
4250     QemuOpts *opts = NULL;
4251     const char *fmt = NULL, *filename, *cache;
4252     int flags;
4253     bool writethrough;
4254     bool quiet = false, progress = false;
4255     BlockBackend *blk = NULL;
4256     BlockDriverState *bs = NULL;
4257     bool image_opts = false;
4258     bool force = false;
4259 
4260     cache = BDRV_DEFAULT_CACHE;
4261     for (;;) {
4262         static const struct option long_options[] = {
4263             {"help", no_argument, 0, 'h'},
4264             {"object", required_argument, 0, OPTION_OBJECT},
4265             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4266             {"force", no_argument, 0, OPTION_FORCE},
4267             {0, 0, 0, 0}
4268         };
4269         c = getopt_long(argc, argv, ":ho:f:t:pq",
4270                         long_options, NULL);
4271         if (c == -1) {
4272             break;
4273         }
4274 
4275         switch (c) {
4276         case ':':
4277             missing_argument(argv[optind - 1]);
4278             break;
4279         case '?':
4280             unrecognized_option(argv[optind - 1]);
4281             break;
4282         case 'h':
4283             help();
4284             break;
4285         case 'o':
4286             if (accumulate_options(&options, optarg) < 0) {
4287                 ret = -1;
4288                 goto out_no_progress;
4289             }
4290             break;
4291         case 'f':
4292             fmt = optarg;
4293             break;
4294         case 't':
4295             cache = optarg;
4296             break;
4297         case 'p':
4298             progress = true;
4299             break;
4300         case 'q':
4301             quiet = true;
4302             break;
4303         case OPTION_OBJECT:
4304             user_creatable_process_cmdline(optarg);
4305             break;
4306         case OPTION_IMAGE_OPTS:
4307             image_opts = true;
4308             break;
4309         case OPTION_FORCE:
4310             force = true;
4311             break;
4312         }
4313     }
4314 
4315     if (!options) {
4316         error_exit("Must specify options (-o)");
4317     }
4318 
4319     if (quiet) {
4320         progress = false;
4321     }
4322     qemu_progress_init(progress, 1.0);
4323 
4324     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4325     if (fmt && has_help_option(options)) {
4326         /* If a format is explicitly specified (and possibly no filename is
4327          * given), print option help here */
4328         ret = print_amend_option_help(fmt);
4329         goto out;
4330     }
4331 
4332     if (optind != argc - 1) {
4333         error_report("Expecting one image file name");
4334         ret = -1;
4335         goto out;
4336     }
4337 
4338     flags = BDRV_O_RDWR;
4339     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4340     if (ret < 0) {
4341         error_report("Invalid cache option: %s", cache);
4342         goto out;
4343     }
4344 
4345     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4346                    false);
4347     if (!blk) {
4348         ret = -1;
4349         goto out;
4350     }
4351     bs = blk_bs(blk);
4352 
4353     fmt = bs->drv->format_name;
4354 
4355     if (has_help_option(options)) {
4356         /* If the format was auto-detected, print option help here */
4357         ret = print_amend_option_help(fmt);
4358         goto out;
4359     }
4360 
4361     bdrv_graph_rdlock_main_loop();
4362     if (!bs->drv->bdrv_amend_options) {
4363         error_report("Format driver '%s' does not support option amendment",
4364                      fmt);
4365         bdrv_graph_rdunlock_main_loop();
4366         ret = -1;
4367         goto out;
4368     }
4369 
4370     /* Every driver supporting amendment must have amend_opts */
4371     assert(bs->drv->amend_opts);
4372 
4373     amend_opts = qemu_opts_append(amend_opts, bs->drv->amend_opts);
4374     opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4375     if (!qemu_opts_do_parse(opts, options, NULL, &err)) {
4376         /* Try to parse options using the create options */
4377         amend_opts = qemu_opts_append(amend_opts, bs->drv->create_opts);
4378         qemu_opts_del(opts);
4379         opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4380         if (qemu_opts_do_parse(opts, options, NULL, NULL)) {
4381             error_append_hint(&err,
4382                               "This option is only supported for image creation\n");
4383         }
4384 
4385         bdrv_graph_rdunlock_main_loop();
4386         error_report_err(err);
4387         ret = -1;
4388         goto out;
4389     }
4390 
4391     /* In case the driver does not call amend_status_cb() */
4392     qemu_progress_print(0.f, 0);
4393     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err);
4394     qemu_progress_print(100.f, 0);
4395     bdrv_graph_rdunlock_main_loop();
4396 
4397     if (ret < 0) {
4398         error_report_err(err);
4399         goto out;
4400     }
4401 
4402 out:
4403     qemu_progress_end();
4404 
4405 out_no_progress:
4406     blk_unref(blk);
4407     qemu_opts_del(opts);
4408     qemu_opts_free(amend_opts);
4409     g_free(options);
4410 
4411     if (ret) {
4412         return 1;
4413     }
4414     return 0;
4415 }
4416 
4417 typedef struct BenchData {
4418     BlockBackend *blk;
4419     uint64_t image_size;
4420     bool write;
4421     int bufsize;
4422     int step;
4423     int nrreq;
4424     int n;
4425     int flush_interval;
4426     bool drain_on_flush;
4427     uint8_t *buf;
4428     QEMUIOVector *qiov;
4429 
4430     int in_flight;
4431     bool in_flush;
4432     uint64_t offset;
4433 } BenchData;
4434 
4435 static void bench_undrained_flush_cb(void *opaque, int ret)
4436 {
4437     if (ret < 0) {
4438         error_report("Failed flush request: %s", strerror(-ret));
4439         exit(EXIT_FAILURE);
4440     }
4441 }
4442 
4443 static void bench_cb(void *opaque, int ret)
4444 {
4445     BenchData *b = opaque;
4446     BlockAIOCB *acb;
4447 
4448     if (ret < 0) {
4449         error_report("Failed request: %s", strerror(-ret));
4450         exit(EXIT_FAILURE);
4451     }
4452 
4453     if (b->in_flush) {
4454         /* Just finished a flush with drained queue: Start next requests */
4455         assert(b->in_flight == 0);
4456         b->in_flush = false;
4457     } else if (b->in_flight > 0) {
4458         int remaining = b->n - b->in_flight;
4459 
4460         b->n--;
4461         b->in_flight--;
4462 
4463         /* Time for flush? Drain queue if requested, then flush */
4464         if (b->flush_interval && remaining % b->flush_interval == 0) {
4465             if (!b->in_flight || !b->drain_on_flush) {
4466                 BlockCompletionFunc *cb;
4467 
4468                 if (b->drain_on_flush) {
4469                     b->in_flush = true;
4470                     cb = bench_cb;
4471                 } else {
4472                     cb = bench_undrained_flush_cb;
4473                 }
4474 
4475                 acb = blk_aio_flush(b->blk, cb, b);
4476                 if (!acb) {
4477                     error_report("Failed to issue flush request");
4478                     exit(EXIT_FAILURE);
4479                 }
4480             }
4481             if (b->drain_on_flush) {
4482                 return;
4483             }
4484         }
4485     }
4486 
4487     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4488         int64_t offset = b->offset;
4489         /* blk_aio_* might look for completed I/Os and kick bench_cb
4490          * again, so make sure this operation is counted by in_flight
4491          * and b->offset is ready for the next submission.
4492          */
4493         b->in_flight++;
4494         b->offset += b->step;
4495         b->offset %= b->image_size;
4496         if (b->write) {
4497             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4498         } else {
4499             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4500         }
4501         if (!acb) {
4502             error_report("Failed to issue request");
4503             exit(EXIT_FAILURE);
4504         }
4505     }
4506 }
4507 
4508 static int img_bench(int argc, char **argv)
4509 {
4510     int c, ret = 0;
4511     const char *fmt = NULL, *filename;
4512     bool quiet = false;
4513     bool image_opts = false;
4514     bool is_write = false;
4515     int count = 75000;
4516     int depth = 64;
4517     int64_t offset = 0;
4518     size_t bufsize = 4096;
4519     int pattern = 0;
4520     size_t step = 0;
4521     int flush_interval = 0;
4522     bool drain_on_flush = true;
4523     int64_t image_size;
4524     BlockBackend *blk = NULL;
4525     BenchData data = {};
4526     int flags = 0;
4527     bool writethrough = false;
4528     struct timeval t1, t2;
4529     int i;
4530     bool force_share = false;
4531     size_t buf_size = 0;
4532 
4533     for (;;) {
4534         static const struct option long_options[] = {
4535             {"help", no_argument, 0, 'h'},
4536             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4537             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4538             {"pattern", required_argument, 0, OPTION_PATTERN},
4539             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4540             {"force-share", no_argument, 0, 'U'},
4541             {0, 0, 0, 0}
4542         };
4543         c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
4544                         NULL);
4545         if (c == -1) {
4546             break;
4547         }
4548 
4549         switch (c) {
4550         case ':':
4551             missing_argument(argv[optind - 1]);
4552             break;
4553         case '?':
4554             unrecognized_option(argv[optind - 1]);
4555             break;
4556         case 'h':
4557             help();
4558             break;
4559         case 'c':
4560         {
4561             unsigned long res;
4562 
4563             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4564                 error_report("Invalid request count specified");
4565                 return 1;
4566             }
4567             count = res;
4568             break;
4569         }
4570         case 'd':
4571         {
4572             unsigned long res;
4573 
4574             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4575                 error_report("Invalid queue depth specified");
4576                 return 1;
4577             }
4578             depth = res;
4579             break;
4580         }
4581         case 'f':
4582             fmt = optarg;
4583             break;
4584         case 'n':
4585             flags |= BDRV_O_NATIVE_AIO;
4586             break;
4587         case 'i':
4588             ret = bdrv_parse_aio(optarg, &flags);
4589             if (ret < 0) {
4590                 error_report("Invalid aio option: %s", optarg);
4591                 ret = -1;
4592                 goto out;
4593             }
4594             break;
4595         case 'o':
4596         {
4597             offset = cvtnum("offset", optarg);
4598             if (offset < 0) {
4599                 return 1;
4600             }
4601             break;
4602         }
4603             break;
4604         case 'q':
4605             quiet = true;
4606             break;
4607         case 's':
4608         {
4609             int64_t sval;
4610 
4611             sval = cvtnum_full("buffer size", optarg, 0, INT_MAX);
4612             if (sval < 0) {
4613                 return 1;
4614             }
4615 
4616             bufsize = sval;
4617             break;
4618         }
4619         case 'S':
4620         {
4621             int64_t sval;
4622 
4623             sval = cvtnum_full("step_size", optarg, 0, INT_MAX);
4624             if (sval < 0) {
4625                 return 1;
4626             }
4627 
4628             step = sval;
4629             break;
4630         }
4631         case 't':
4632             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4633             if (ret < 0) {
4634                 error_report("Invalid cache mode");
4635                 ret = -1;
4636                 goto out;
4637             }
4638             break;
4639         case 'w':
4640             flags |= BDRV_O_RDWR;
4641             is_write = true;
4642             break;
4643         case 'U':
4644             force_share = true;
4645             break;
4646         case OPTION_PATTERN:
4647         {
4648             unsigned long res;
4649 
4650             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > 0xff) {
4651                 error_report("Invalid pattern byte specified");
4652                 return 1;
4653             }
4654             pattern = res;
4655             break;
4656         }
4657         case OPTION_FLUSH_INTERVAL:
4658         {
4659             unsigned long res;
4660 
4661             if (qemu_strtoul(optarg, NULL, 0, &res) < 0 || res > INT_MAX) {
4662                 error_report("Invalid flush interval specified");
4663                 return 1;
4664             }
4665             flush_interval = res;
4666             break;
4667         }
4668         case OPTION_NO_DRAIN:
4669             drain_on_flush = false;
4670             break;
4671         case OPTION_IMAGE_OPTS:
4672             image_opts = true;
4673             break;
4674         }
4675     }
4676 
4677     if (optind != argc - 1) {
4678         error_exit("Expecting one image file name");
4679     }
4680     filename = argv[argc - 1];
4681 
4682     if (!is_write && flush_interval) {
4683         error_report("--flush-interval is only available in write tests");
4684         ret = -1;
4685         goto out;
4686     }
4687     if (flush_interval && flush_interval < depth) {
4688         error_report("Flush interval can't be smaller than depth");
4689         ret = -1;
4690         goto out;
4691     }
4692 
4693     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4694                    force_share);
4695     if (!blk) {
4696         ret = -1;
4697         goto out;
4698     }
4699 
4700     image_size = blk_getlength(blk);
4701     if (image_size < 0) {
4702         ret = image_size;
4703         goto out;
4704     }
4705 
4706     data = (BenchData) {
4707         .blk            = blk,
4708         .image_size     = image_size,
4709         .bufsize        = bufsize,
4710         .step           = step ?: bufsize,
4711         .nrreq          = depth,
4712         .n              = count,
4713         .offset         = offset,
4714         .write          = is_write,
4715         .flush_interval = flush_interval,
4716         .drain_on_flush = drain_on_flush,
4717     };
4718     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4719            "(starting at offset %" PRId64 ", step size %d)\n",
4720            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4721            data.offset, data.step);
4722     if (flush_interval) {
4723         printf("Sending flush every %d requests\n", flush_interval);
4724     }
4725 
4726     buf_size = data.nrreq * data.bufsize;
4727     data.buf = blk_blockalign(blk, buf_size);
4728     memset(data.buf, pattern, data.nrreq * data.bufsize);
4729 
4730     blk_register_buf(blk, data.buf, buf_size, &error_fatal);
4731 
4732     data.qiov = g_new(QEMUIOVector, data.nrreq);
4733     for (i = 0; i < data.nrreq; i++) {
4734         qemu_iovec_init(&data.qiov[i], 1);
4735         qemu_iovec_add(&data.qiov[i],
4736                        data.buf + i * data.bufsize, data.bufsize);
4737     }
4738 
4739     gettimeofday(&t1, NULL);
4740     bench_cb(&data, 0);
4741 
4742     while (data.n > 0) {
4743         main_loop_wait(false);
4744     }
4745     gettimeofday(&t2, NULL);
4746 
4747     printf("Run completed in %3.3f seconds.\n",
4748            (t2.tv_sec - t1.tv_sec)
4749            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4750 
4751 out:
4752     if (data.buf) {
4753         blk_unregister_buf(blk, data.buf, buf_size);
4754     }
4755     qemu_vfree(data.buf);
4756     blk_unref(blk);
4757 
4758     if (ret) {
4759         return 1;
4760     }
4761     return 0;
4762 }
4763 
4764 enum ImgBitmapAct {
4765     BITMAP_ADD,
4766     BITMAP_REMOVE,
4767     BITMAP_CLEAR,
4768     BITMAP_ENABLE,
4769     BITMAP_DISABLE,
4770     BITMAP_MERGE,
4771 };
4772 typedef struct ImgBitmapAction {
4773     enum ImgBitmapAct act;
4774     const char *src; /* only used for merge */
4775     QSIMPLEQ_ENTRY(ImgBitmapAction) next;
4776 } ImgBitmapAction;
4777 
4778 static int img_bitmap(int argc, char **argv)
4779 {
4780     Error *err = NULL;
4781     int c, ret = 1;
4782     QemuOpts *opts = NULL;
4783     const char *fmt = NULL, *src_fmt = NULL, *src_filename = NULL;
4784     const char *filename, *bitmap;
4785     BlockBackend *blk = NULL, *src = NULL;
4786     BlockDriverState *bs = NULL, *src_bs = NULL;
4787     bool image_opts = false;
4788     int64_t granularity = 0;
4789     bool add = false, merge = false;
4790     QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
4791     ImgBitmapAction *act, *act_next;
4792     const char *op;
4793     int inactivate_ret;
4794 
4795     QSIMPLEQ_INIT(&actions);
4796 
4797     for (;;) {
4798         static const struct option long_options[] = {
4799             {"help", no_argument, 0, 'h'},
4800             {"object", required_argument, 0, OPTION_OBJECT},
4801             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4802             {"add", no_argument, 0, OPTION_ADD},
4803             {"remove", no_argument, 0, OPTION_REMOVE},
4804             {"clear", no_argument, 0, OPTION_CLEAR},
4805             {"enable", no_argument, 0, OPTION_ENABLE},
4806             {"disable", no_argument, 0, OPTION_DISABLE},
4807             {"merge", required_argument, 0, OPTION_MERGE},
4808             {"granularity", required_argument, 0, 'g'},
4809             {"source-file", required_argument, 0, 'b'},
4810             {"source-format", required_argument, 0, 'F'},
4811             {0, 0, 0, 0}
4812         };
4813         c = getopt_long(argc, argv, ":b:f:F:g:h", long_options, NULL);
4814         if (c == -1) {
4815             break;
4816         }
4817 
4818         switch (c) {
4819         case ':':
4820             missing_argument(argv[optind - 1]);
4821             break;
4822         case '?':
4823             unrecognized_option(argv[optind - 1]);
4824             break;
4825         case 'h':
4826             help();
4827             break;
4828         case 'b':
4829             src_filename = optarg;
4830             break;
4831         case 'f':
4832             fmt = optarg;
4833             break;
4834         case 'F':
4835             src_fmt = optarg;
4836             break;
4837         case 'g':
4838             granularity = cvtnum("granularity", optarg);
4839             if (granularity < 0) {
4840                 return 1;
4841             }
4842             break;
4843         case OPTION_ADD:
4844             act = g_new0(ImgBitmapAction, 1);
4845             act->act = BITMAP_ADD;
4846             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4847             add = true;
4848             break;
4849         case OPTION_REMOVE:
4850             act = g_new0(ImgBitmapAction, 1);
4851             act->act = BITMAP_REMOVE;
4852             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4853             break;
4854         case OPTION_CLEAR:
4855             act = g_new0(ImgBitmapAction, 1);
4856             act->act = BITMAP_CLEAR;
4857             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4858             break;
4859         case OPTION_ENABLE:
4860             act = g_new0(ImgBitmapAction, 1);
4861             act->act = BITMAP_ENABLE;
4862             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4863             break;
4864         case OPTION_DISABLE:
4865             act = g_new0(ImgBitmapAction, 1);
4866             act->act = BITMAP_DISABLE;
4867             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4868             break;
4869         case OPTION_MERGE:
4870             act = g_new0(ImgBitmapAction, 1);
4871             act->act = BITMAP_MERGE;
4872             act->src = optarg;
4873             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
4874             merge = true;
4875             break;
4876         case OPTION_OBJECT:
4877             user_creatable_process_cmdline(optarg);
4878             break;
4879         case OPTION_IMAGE_OPTS:
4880             image_opts = true;
4881             break;
4882         }
4883     }
4884 
4885     if (QSIMPLEQ_EMPTY(&actions)) {
4886         error_report("Need at least one of --add, --remove, --clear, "
4887                      "--enable, --disable, or --merge");
4888         goto out;
4889     }
4890 
4891     if (granularity && !add) {
4892         error_report("granularity only supported with --add");
4893         goto out;
4894     }
4895     if (src_fmt && !src_filename) {
4896         error_report("-F only supported with -b");
4897         goto out;
4898     }
4899     if (src_filename && !merge) {
4900         error_report("Merge bitmap source file only supported with "
4901                      "--merge");
4902         goto out;
4903     }
4904 
4905     if (optind != argc - 2) {
4906         error_report("Expecting filename and bitmap name");
4907         goto out;
4908     }
4909 
4910     filename = argv[optind];
4911     bitmap = argv[optind + 1];
4912 
4913     /*
4914      * No need to open backing chains; we will be manipulating bitmaps
4915      * directly in this image without reference to image contents.
4916      */
4917     blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_NO_BACKING,
4918                    false, false, false);
4919     if (!blk) {
4920         goto out;
4921     }
4922     bs = blk_bs(blk);
4923     if (src_filename) {
4924         src = img_open(false, src_filename, src_fmt, BDRV_O_NO_BACKING,
4925                        false, false, false);
4926         if (!src) {
4927             goto out;
4928         }
4929         src_bs = blk_bs(src);
4930     } else {
4931         src_bs = bs;
4932     }
4933 
4934     QSIMPLEQ_FOREACH_SAFE(act, &actions, next, act_next) {
4935         switch (act->act) {
4936         case BITMAP_ADD:
4937             qmp_block_dirty_bitmap_add(bs->node_name, bitmap,
4938                                        !!granularity, granularity, true, true,
4939                                        false, false, &err);
4940             op = "add";
4941             break;
4942         case BITMAP_REMOVE:
4943             qmp_block_dirty_bitmap_remove(bs->node_name, bitmap, &err);
4944             op = "remove";
4945             break;
4946         case BITMAP_CLEAR:
4947             qmp_block_dirty_bitmap_clear(bs->node_name, bitmap, &err);
4948             op = "clear";
4949             break;
4950         case BITMAP_ENABLE:
4951             qmp_block_dirty_bitmap_enable(bs->node_name, bitmap, &err);
4952             op = "enable";
4953             break;
4954         case BITMAP_DISABLE:
4955             qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err);
4956             op = "disable";
4957             break;
4958         case BITMAP_MERGE:
4959             do_dirty_bitmap_merge(bs->node_name, bitmap, src_bs->node_name,
4960                                   act->src, &err);
4961             op = "merge";
4962             break;
4963         default:
4964             g_assert_not_reached();
4965         }
4966 
4967         if (err) {
4968             error_reportf_err(err, "Operation %s on bitmap %s failed: ",
4969                               op, bitmap);
4970             goto out;
4971         }
4972         g_free(act);
4973     }
4974 
4975     ret = 0;
4976 
4977  out:
4978     /*
4979      * Manually inactivate the images first because this way we can know whether
4980      * an error occurred. blk_unref() doesn't tell us about failures.
4981      */
4982     inactivate_ret = bdrv_inactivate_all();
4983     if (inactivate_ret < 0) {
4984         error_report("Error while closing the image: %s", strerror(-inactivate_ret));
4985         ret = 1;
4986     }
4987 
4988     blk_unref(src);
4989     blk_unref(blk);
4990     qemu_opts_del(opts);
4991     return ret;
4992 }
4993 
4994 #define C_BS      01
4995 #define C_COUNT   02
4996 #define C_IF      04
4997 #define C_OF      010
4998 #define C_SKIP    020
4999 
5000 struct DdInfo {
5001     unsigned int flags;
5002     int64_t count;
5003 };
5004 
5005 struct DdIo {
5006     int bsz;    /* Block size */
5007     char *filename;
5008     uint8_t *buf;
5009     int64_t offset;
5010 };
5011 
5012 struct DdOpts {
5013     const char *name;
5014     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
5015     unsigned int flag;
5016 };
5017 
5018 static int img_dd_bs(const char *arg,
5019                      struct DdIo *in, struct DdIo *out,
5020                      struct DdInfo *dd)
5021 {
5022     int64_t res;
5023 
5024     res = cvtnum_full("bs", arg, 1, INT_MAX);
5025 
5026     if (res < 0) {
5027         return 1;
5028     }
5029     in->bsz = out->bsz = res;
5030 
5031     return 0;
5032 }
5033 
5034 static int img_dd_count(const char *arg,
5035                         struct DdIo *in, struct DdIo *out,
5036                         struct DdInfo *dd)
5037 {
5038     dd->count = cvtnum("count", arg);
5039 
5040     if (dd->count < 0) {
5041         return 1;
5042     }
5043 
5044     return 0;
5045 }
5046 
5047 static int img_dd_if(const char *arg,
5048                      struct DdIo *in, struct DdIo *out,
5049                      struct DdInfo *dd)
5050 {
5051     in->filename = g_strdup(arg);
5052 
5053     return 0;
5054 }
5055 
5056 static int img_dd_of(const char *arg,
5057                      struct DdIo *in, struct DdIo *out,
5058                      struct DdInfo *dd)
5059 {
5060     out->filename = g_strdup(arg);
5061 
5062     return 0;
5063 }
5064 
5065 static int img_dd_skip(const char *arg,
5066                        struct DdIo *in, struct DdIo *out,
5067                        struct DdInfo *dd)
5068 {
5069     in->offset = cvtnum("skip", arg);
5070 
5071     if (in->offset < 0) {
5072         return 1;
5073     }
5074 
5075     return 0;
5076 }
5077 
5078 static int img_dd(int argc, char **argv)
5079 {
5080     int ret = 0;
5081     char *arg = NULL;
5082     char *tmp;
5083     BlockDriver *drv = NULL, *proto_drv = NULL;
5084     BlockBackend *blk1 = NULL, *blk2 = NULL;
5085     QemuOpts *opts = NULL;
5086     QemuOptsList *create_opts = NULL;
5087     Error *local_err = NULL;
5088     bool image_opts = false;
5089     int c, i;
5090     const char *out_fmt = "raw";
5091     const char *fmt = NULL;
5092     int64_t size = 0;
5093     int64_t out_pos, in_pos;
5094     bool force_share = false;
5095     struct DdInfo dd = {
5096         .flags = 0,
5097         .count = 0,
5098     };
5099     struct DdIo in = {
5100         .bsz = 512, /* Block size is by default 512 bytes */
5101         .filename = NULL,
5102         .buf = NULL,
5103         .offset = 0
5104     };
5105     struct DdIo out = {
5106         .bsz = 512,
5107         .filename = NULL,
5108         .buf = NULL,
5109         .offset = 0
5110     };
5111 
5112     const struct DdOpts options[] = {
5113         { "bs", img_dd_bs, C_BS },
5114         { "count", img_dd_count, C_COUNT },
5115         { "if", img_dd_if, C_IF },
5116         { "of", img_dd_of, C_OF },
5117         { "skip", img_dd_skip, C_SKIP },
5118         { NULL, NULL, 0 }
5119     };
5120     const struct option long_options[] = {
5121         { "help", no_argument, 0, 'h'},
5122         { "object", required_argument, 0, OPTION_OBJECT},
5123         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5124         { "force-share", no_argument, 0, 'U'},
5125         { 0, 0, 0, 0 }
5126     };
5127 
5128     while ((c = getopt_long(argc, argv, ":hf:O:U", long_options, NULL))) {
5129         if (c == EOF) {
5130             break;
5131         }
5132         switch (c) {
5133         case 'O':
5134             out_fmt = optarg;
5135             break;
5136         case 'f':
5137             fmt = optarg;
5138             break;
5139         case ':':
5140             missing_argument(argv[optind - 1]);
5141             break;
5142         case '?':
5143             unrecognized_option(argv[optind - 1]);
5144             break;
5145         case 'h':
5146             help();
5147             break;
5148         case 'U':
5149             force_share = true;
5150             break;
5151         case OPTION_OBJECT:
5152             user_creatable_process_cmdline(optarg);
5153             break;
5154         case OPTION_IMAGE_OPTS:
5155             image_opts = true;
5156             break;
5157         }
5158     }
5159 
5160     for (i = optind; i < argc; i++) {
5161         int j;
5162         arg = g_strdup(argv[i]);
5163 
5164         tmp = strchr(arg, '=');
5165         if (tmp == NULL) {
5166             error_report("unrecognized operand %s", arg);
5167             ret = -1;
5168             goto out;
5169         }
5170 
5171         *tmp++ = '\0';
5172 
5173         for (j = 0; options[j].name != NULL; j++) {
5174             if (!strcmp(arg, options[j].name)) {
5175                 break;
5176             }
5177         }
5178         if (options[j].name == NULL) {
5179             error_report("unrecognized operand %s", arg);
5180             ret = -1;
5181             goto out;
5182         }
5183 
5184         if (options[j].f(tmp, &in, &out, &dd) != 0) {
5185             ret = -1;
5186             goto out;
5187         }
5188         dd.flags |= options[j].flag;
5189         g_free(arg);
5190         arg = NULL;
5191     }
5192 
5193     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
5194         error_report("Must specify both input and output files");
5195         ret = -1;
5196         goto out;
5197     }
5198 
5199     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
5200                     force_share);
5201 
5202     if (!blk1) {
5203         ret = -1;
5204         goto out;
5205     }
5206 
5207     drv = bdrv_find_format(out_fmt);
5208     if (!drv) {
5209         error_report("Unknown file format");
5210         ret = -1;
5211         goto out;
5212     }
5213     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
5214 
5215     if (!proto_drv) {
5216         error_report_err(local_err);
5217         ret = -1;
5218         goto out;
5219     }
5220     if (!drv->create_opts) {
5221         error_report("Format driver '%s' does not support image creation",
5222                      drv->format_name);
5223         ret = -1;
5224         goto out;
5225     }
5226     if (!proto_drv->create_opts) {
5227         error_report("Protocol driver '%s' does not support image creation",
5228                      proto_drv->format_name);
5229         ret = -1;
5230         goto out;
5231     }
5232     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5233     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5234 
5235     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5236 
5237     size = blk_getlength(blk1);
5238     if (size < 0) {
5239         error_report("Failed to get size for '%s'", in.filename);
5240         ret = -1;
5241         goto out;
5242     }
5243 
5244     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
5245         dd.count * in.bsz < size) {
5246         size = dd.count * in.bsz;
5247     }
5248 
5249     /* Overflow means the specified offset is beyond input image's size */
5250     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5251                               size < in.bsz * in.offset)) {
5252         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
5253     } else {
5254         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
5255                             size - in.bsz * in.offset, &error_abort);
5256     }
5257 
5258     ret = bdrv_create(drv, out.filename, opts, &local_err);
5259     if (ret < 0) {
5260         error_reportf_err(local_err,
5261                           "%s: error while creating output image: ",
5262                           out.filename);
5263         ret = -1;
5264         goto out;
5265     }
5266 
5267     /* TODO, we can't honour --image-opts for the target,
5268      * since it needs to be given in a format compatible
5269      * with the bdrv_create() call above which does not
5270      * support image-opts style.
5271      */
5272     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
5273                          false, false, false);
5274 
5275     if (!blk2) {
5276         ret = -1;
5277         goto out;
5278     }
5279 
5280     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5281                               size < in.offset * in.bsz)) {
5282         /* We give a warning if the skip option is bigger than the input
5283          * size and create an empty output disk image (i.e. like dd(1)).
5284          */
5285         error_report("%s: cannot skip to specified offset", in.filename);
5286         in_pos = size;
5287     } else {
5288         in_pos = in.offset * in.bsz;
5289     }
5290 
5291     in.buf = g_new(uint8_t, in.bsz);
5292 
5293     for (out_pos = 0; in_pos < size; ) {
5294         int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
5295 
5296         ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
5297         if (ret < 0) {
5298             error_report("error while reading from input image file: %s",
5299                          strerror(-ret));
5300             goto out;
5301         }
5302         in_pos += bytes;
5303 
5304         ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
5305         if (ret < 0) {
5306             error_report("error while writing to output image file: %s",
5307                          strerror(-ret));
5308             goto out;
5309         }
5310         out_pos += bytes;
5311     }
5312 
5313 out:
5314     g_free(arg);
5315     qemu_opts_del(opts);
5316     qemu_opts_free(create_opts);
5317     blk_unref(blk1);
5318     blk_unref(blk2);
5319     g_free(in.filename);
5320     g_free(out.filename);
5321     g_free(in.buf);
5322     g_free(out.buf);
5323 
5324     if (ret) {
5325         return 1;
5326     }
5327     return 0;
5328 }
5329 
5330 static void dump_json_block_measure_info(BlockMeasureInfo *info)
5331 {
5332     GString *str;
5333     QObject *obj;
5334     Visitor *v = qobject_output_visitor_new(&obj);
5335 
5336     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
5337     visit_complete(v, &obj);
5338     str = qobject_to_json_pretty(obj, true);
5339     assert(str != NULL);
5340     printf("%s\n", str->str);
5341     qobject_unref(obj);
5342     visit_free(v);
5343     g_string_free(str, true);
5344 }
5345 
5346 static int img_measure(int argc, char **argv)
5347 {
5348     static const struct option long_options[] = {
5349         {"help", no_argument, 0, 'h'},
5350         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5351         {"object", required_argument, 0, OPTION_OBJECT},
5352         {"output", required_argument, 0, OPTION_OUTPUT},
5353         {"size", required_argument, 0, OPTION_SIZE},
5354         {"force-share", no_argument, 0, 'U'},
5355         {0, 0, 0, 0}
5356     };
5357     OutputFormat output_format = OFORMAT_HUMAN;
5358     BlockBackend *in_blk = NULL;
5359     BlockDriver *drv;
5360     const char *filename = NULL;
5361     const char *fmt = NULL;
5362     const char *out_fmt = "raw";
5363     char *options = NULL;
5364     char *snapshot_name = NULL;
5365     bool force_share = false;
5366     QemuOpts *opts = NULL;
5367     QemuOpts *object_opts = NULL;
5368     QemuOpts *sn_opts = NULL;
5369     QemuOptsList *create_opts = NULL;
5370     bool image_opts = false;
5371     uint64_t img_size = UINT64_MAX;
5372     BlockMeasureInfo *info = NULL;
5373     Error *local_err = NULL;
5374     int ret = 1;
5375     int c;
5376 
5377     while ((c = getopt_long(argc, argv, "hf:O:o:l:U",
5378                             long_options, NULL)) != -1) {
5379         switch (c) {
5380         case '?':
5381         case 'h':
5382             help();
5383             break;
5384         case 'f':
5385             fmt = optarg;
5386             break;
5387         case 'O':
5388             out_fmt = optarg;
5389             break;
5390         case 'o':
5391             if (accumulate_options(&options, optarg) < 0) {
5392                 goto out;
5393             }
5394             break;
5395         case 'l':
5396             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
5397                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
5398                                                   optarg, false);
5399                 if (!sn_opts) {
5400                     error_report("Failed in parsing snapshot param '%s'",
5401                                  optarg);
5402                     goto out;
5403                 }
5404             } else {
5405                 snapshot_name = optarg;
5406             }
5407             break;
5408         case 'U':
5409             force_share = true;
5410             break;
5411         case OPTION_OBJECT:
5412             user_creatable_process_cmdline(optarg);
5413             break;
5414         case OPTION_IMAGE_OPTS:
5415             image_opts = true;
5416             break;
5417         case OPTION_OUTPUT:
5418             if (!strcmp(optarg, "json")) {
5419                 output_format = OFORMAT_JSON;
5420             } else if (!strcmp(optarg, "human")) {
5421                 output_format = OFORMAT_HUMAN;
5422             } else {
5423                 error_report("--output must be used with human or json "
5424                              "as argument.");
5425                 goto out;
5426             }
5427             break;
5428         case OPTION_SIZE:
5429         {
5430             int64_t sval;
5431 
5432             sval = cvtnum("image size", optarg);
5433             if (sval < 0) {
5434                 goto out;
5435             }
5436             img_size = (uint64_t)sval;
5437         }
5438         break;
5439         }
5440     }
5441 
5442     if (argc - optind > 1) {
5443         error_report("At most one filename argument is allowed.");
5444         goto out;
5445     } else if (argc - optind == 1) {
5446         filename = argv[optind];
5447     }
5448 
5449     if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
5450         error_report("--image-opts, -f, and -l require a filename argument.");
5451         goto out;
5452     }
5453     if (filename && img_size != UINT64_MAX) {
5454         error_report("--size N cannot be used together with a filename.");
5455         goto out;
5456     }
5457     if (!filename && img_size == UINT64_MAX) {
5458         error_report("Either --size N or one filename must be specified.");
5459         goto out;
5460     }
5461 
5462     if (filename) {
5463         in_blk = img_open(image_opts, filename, fmt, 0,
5464                           false, false, force_share);
5465         if (!in_blk) {
5466             goto out;
5467         }
5468 
5469         if (sn_opts) {
5470             bdrv_snapshot_load_tmp(blk_bs(in_blk),
5471                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
5472                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
5473                     &local_err);
5474         } else if (snapshot_name != NULL) {
5475             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
5476                     snapshot_name, &local_err);
5477         }
5478         if (local_err) {
5479             error_reportf_err(local_err, "Failed to load snapshot: ");
5480             goto out;
5481         }
5482     }
5483 
5484     drv = bdrv_find_format(out_fmt);
5485     if (!drv) {
5486         error_report("Unknown file format '%s'", out_fmt);
5487         goto out;
5488     }
5489     if (!drv->create_opts) {
5490         error_report("Format driver '%s' does not support image creation",
5491                      drv->format_name);
5492         goto out;
5493     }
5494 
5495     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5496     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
5497     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5498     if (options) {
5499         if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
5500             error_report_err(local_err);
5501             error_report("Invalid options for file format '%s'", out_fmt);
5502             goto out;
5503         }
5504     }
5505     if (img_size != UINT64_MAX) {
5506         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5507     }
5508 
5509     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
5510     if (local_err) {
5511         error_report_err(local_err);
5512         goto out;
5513     }
5514 
5515     if (output_format == OFORMAT_HUMAN) {
5516         printf("required size: %" PRIu64 "\n", info->required);
5517         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5518         if (info->has_bitmaps) {
5519             printf("bitmaps size: %" PRIu64 "\n", info->bitmaps);
5520         }
5521     } else {
5522         dump_json_block_measure_info(info);
5523     }
5524 
5525     ret = 0;
5526 
5527 out:
5528     qapi_free_BlockMeasureInfo(info);
5529     qemu_opts_del(object_opts);
5530     qemu_opts_del(opts);
5531     qemu_opts_del(sn_opts);
5532     qemu_opts_free(create_opts);
5533     g_free(options);
5534     blk_unref(in_blk);
5535     return ret;
5536 }
5537 
5538 static const img_cmd_t img_cmds[] = {
5539 #define DEF(option, callback, arg_string)        \
5540     { option, callback },
5541 #include "qemu-img-cmds.h"
5542 #undef DEF
5543     { NULL, NULL, },
5544 };
5545 
5546 int main(int argc, char **argv)
5547 {
5548     const img_cmd_t *cmd;
5549     const char *cmdname;
5550     int c;
5551     static const struct option long_options[] = {
5552         {"help", no_argument, 0, 'h'},
5553         {"version", no_argument, 0, 'V'},
5554         {"trace", required_argument, NULL, 'T'},
5555         {0, 0, 0, 0}
5556     };
5557 
5558 #ifdef CONFIG_POSIX
5559     signal(SIGPIPE, SIG_IGN);
5560 #endif
5561 
5562     socket_init();
5563     error_init(argv[0]);
5564     module_call_init(MODULE_INIT_TRACE);
5565     qemu_init_exec_dir(argv[0]);
5566 
5567     qemu_init_main_loop(&error_fatal);
5568 
5569     qcrypto_init(&error_fatal);
5570 
5571     module_call_init(MODULE_INIT_QOM);
5572     bdrv_init();
5573     if (argc < 2) {
5574         error_exit("Not enough arguments");
5575     }
5576 
5577     qemu_add_opts(&qemu_source_opts);
5578     qemu_add_opts(&qemu_trace_opts);
5579 
5580     while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
5581         switch (c) {
5582         case ':':
5583             missing_argument(argv[optind - 1]);
5584             return 0;
5585         case '?':
5586             unrecognized_option(argv[optind - 1]);
5587             return 0;
5588         case 'h':
5589             help();
5590             return 0;
5591         case 'V':
5592             printf(QEMU_IMG_VERSION);
5593             return 0;
5594         case 'T':
5595             trace_opt_parse(optarg);
5596             break;
5597         }
5598     }
5599 
5600     cmdname = argv[optind];
5601 
5602     /* reset getopt_long scanning */
5603     argc -= optind;
5604     if (argc < 1) {
5605         return 0;
5606     }
5607     argv += optind;
5608     qemu_reset_optind();
5609 
5610     if (!trace_init_backends()) {
5611         exit(1);
5612     }
5613     trace_init_file();
5614     qemu_set_log(LOG_TRACE, &error_fatal);
5615 
5616     /* find the command */
5617     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5618         if (!strcmp(cmdname, cmd->name)) {
5619             return cmd->handler(argc, argv);
5620         }
5621     }
5622 
5623     /* not found */
5624     error_exit("Command not found: %s", cmdname);
5625 }
5626