xref: /openbmc/qemu/qemu-img.c (revision 9febfa94b69b7146582c48a868bd2330ac45037f)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu/help-texts.h"
29 #include "qemu/qemu-progress.h"
30 #include "qemu-version.h"
31 #include "qapi/error.h"
32 #include "qapi/qapi-commands-block-core.h"
33 #include "qapi/qapi-visit-block-core.h"
34 #include "qapi/qobject-output-visitor.h"
35 #include "qobject/qjson.h"
36 #include "qobject/qdict.h"
37 #include "qemu/cutils.h"
38 #include "qemu/config-file.h"
39 #include "qemu/option.h"
40 #include "qemu/error-report.h"
41 #include "qemu/log.h"
42 #include "qemu/main-loop.h"
43 #include "qemu/module.h"
44 #include "qemu/sockets.h"
45 #include "qemu/units.h"
46 #include "qemu/memalign.h"
47 #include "qom/object_interfaces.h"
48 #include "system/block-backend.h"
49 #include "block/block_int.h"
50 #include "block/blockjob.h"
51 #include "block/dirty-bitmap.h"
52 #include "block/qapi.h"
53 #include "crypto/init.h"
54 #include "trace/control.h"
55 #include "qemu/throttle.h"
56 #include "block/throttle-groups.h"
57 
58 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
59                           "\n" QEMU_COPYRIGHT "\n"
60 
61 typedef struct img_cmd_t {
62     const char *name;
63     int (*handler)(const struct img_cmd_t *ccmd, int argc, char **argv);
64     const char *description;
65 } img_cmd_t;
66 
67 enum {
68     OPTION_OUTPUT = 256,
69     OPTION_BACKING_CHAIN = 257,
70     OPTION_OBJECT = 258,
71     OPTION_IMAGE_OPTS = 259,
72     OPTION_PATTERN = 260,
73     OPTION_FLUSH_INTERVAL = 261,
74     OPTION_NO_DRAIN = 262,
75     OPTION_TARGET_IMAGE_OPTS = 263,
76     OPTION_PREALLOCATION = 265,
77     OPTION_SHRINK = 266,
78     OPTION_SALVAGE = 267,
79     OPTION_TARGET_IS_ZERO = 268,
80     OPTION_ADD = 269,
81     OPTION_REMOVE = 270,
82     OPTION_CLEAR = 271,
83     OPTION_ENABLE = 272,
84     OPTION_DISABLE = 273,
85     OPTION_MERGE = 274,
86     OPTION_BITMAPS = 275,
87     OPTION_FORCE = 276,
88     OPTION_SKIP_BROKEN = 277,
89     OPTION_LIMITS = 278,
90 };
91 
92 typedef enum OutputFormat {
93     OFORMAT_JSON,
94     OFORMAT_HUMAN,
95 } OutputFormat;
96 
97 /* Default to cache=writeback as data integrity is not important for qemu-img */
98 #define BDRV_DEFAULT_CACHE "writeback"
99 
100 static G_NORETURN
101 void tryhelp(const char *argv0)
102 {
103     error_printf("Try '%s --help' for more information\n", argv0);
104     exit(EXIT_FAILURE);
105 }
106 
107 static G_NORETURN G_GNUC_PRINTF(2, 3)
108 void error_exit(const char *argv0, const char *fmt, ...)
109 {
110     va_list ap;
111 
112     va_start(ap, fmt);
113     error_vreport(fmt, ap);
114     va_end(ap);
115 
116     tryhelp(argv0);
117 }
118 
119 /*
120  * Print --help output for a command and exit.
121  * @syntax and @description are multi-line with trailing EOL
122  * (to allow easy extending of the text)
123  * @syntax has each subsequent line indented by 8 chars.
124  * @description is indented by 2 chars for argument on each own line,
125  * and with 5 chars for argument description (like -h arg below).
126  */
127 static G_NORETURN
128 void cmd_help(const img_cmd_t *ccmd,
129               const char *syntax, const char *arguments)
130 {
131     printf(
132 "Usage:\n"
133 "  %s %s %s\n"
134 "%s.\n"
135 "\n"
136 "Arguments:\n"
137 "  -h, --help\n"
138 "     print this help and exit\n"
139 "%s\n",
140            "qemu-img", ccmd->name, syntax, ccmd->description, arguments);
141     exit(EXIT_SUCCESS);
142 }
143 
144 static OutputFormat parse_output_format(const char *argv0, const char *arg)
145 {
146     if (!strcmp(arg, "json")) {
147         return OFORMAT_JSON;
148     } else if (!strcmp(arg, "human")) {
149         return OFORMAT_HUMAN;
150     } else {
151         error_exit(argv0, "--output expects 'human' or 'json', not '%s'", arg);
152     }
153 }
154 
155 /*
156  * Is @list safe for accumulate_options()?
157  * It is when multiple of them can be joined together separated by ','.
158  * To make that work, @list must not start with ',' (or else a
159  * separating ',' preceding it gets escaped), and it must not end with
160  * an odd number of ',' (or else a separating ',' following it gets
161  * escaped), or be empty (or else a separating ',' preceding it can
162  * escape a separating ',' following it).
163  *
164  */
165 static bool is_valid_option_list(const char *list)
166 {
167     size_t len = strlen(list);
168     size_t i;
169 
170     if (!list[0] || list[0] == ',') {
171         return false;
172     }
173 
174     for (i = len; i > 0 && list[i - 1] == ','; i--) {
175     }
176     if ((len - i) % 2) {
177         return false;
178     }
179 
180     return true;
181 }
182 
183 static int accumulate_options(char **options, char *list)
184 {
185     char *new_options;
186 
187     if (!is_valid_option_list(list)) {
188         error_report("Invalid option list: %s", list);
189         return -1;
190     }
191 
192     if (!*options) {
193         *options = g_strdup(list);
194     } else {
195         new_options = g_strdup_printf("%s,%s", *options, list);
196         g_free(*options);
197         *options = new_options;
198     }
199     return 0;
200 }
201 
202 static QemuOptsList qemu_source_opts = {
203     .name = "source",
204     .implied_opt_name = "file",
205     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
206     .desc = {
207         { }
208     },
209 };
210 
211 static int G_GNUC_PRINTF(2, 3) qprintf(bool quiet, const char *fmt, ...)
212 {
213     int ret = 0;
214     if (!quiet) {
215         va_list args;
216         va_start(args, fmt);
217         ret = vprintf(fmt, args);
218         va_end(args);
219     }
220     return ret;
221 }
222 
223 
224 static int print_block_option_help(const char *filename, const char *fmt)
225 {
226     BlockDriver *drv, *proto_drv;
227     QemuOptsList *create_opts = NULL;
228     Error *local_err = NULL;
229 
230     /* Find driver and parse its options */
231     drv = bdrv_find_format(fmt);
232     if (!drv) {
233         error_report("Unknown file format '%s'", fmt);
234         return 1;
235     }
236 
237     if (!drv->create_opts) {
238         error_report("Format driver '%s' does not support image creation", fmt);
239         return 1;
240     }
241 
242     create_opts = qemu_opts_append(create_opts, drv->create_opts);
243     if (filename) {
244         proto_drv = bdrv_find_protocol(filename, true, &local_err);
245         if (!proto_drv) {
246             error_report_err(local_err);
247             qemu_opts_free(create_opts);
248             return 1;
249         }
250         if (!proto_drv->create_opts) {
251             error_report("Protocol driver '%s' does not support image creation",
252                          proto_drv->format_name);
253             qemu_opts_free(create_opts);
254             return 1;
255         }
256         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
257     }
258 
259     if (filename) {
260         printf("Supported options:\n");
261     } else {
262         printf("Supported %s options:\n", fmt);
263     }
264     qemu_opts_print_help(create_opts, false);
265     qemu_opts_free(create_opts);
266 
267     if (!filename) {
268         printf("\n"
269                "The protocol level may support further options.\n"
270                "Specify the target filename to include those options.\n");
271     }
272 
273     return 0;
274 }
275 
276 
277 static BlockBackend *img_open_opts(const char *optstr,
278                                    QemuOpts *opts, int flags, bool writethrough,
279                                    bool quiet, bool force_share)
280 {
281     QDict *options;
282     Error *local_err = NULL;
283     BlockBackend *blk;
284     options = qemu_opts_to_qdict(opts, NULL);
285     if (force_share) {
286         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
287             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
288             error_report("--force-share/-U conflicts with image options");
289             qobject_unref(options);
290             return NULL;
291         }
292         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
293     }
294     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
295     if (!blk) {
296         error_reportf_err(local_err, "Could not open '%s': ", optstr);
297         return NULL;
298     }
299     blk_set_enable_write_cache(blk, !writethrough);
300 
301     return blk;
302 }
303 
304 static BlockBackend *img_open_file(const char *filename,
305                                    QDict *options,
306                                    const char *fmt, int flags,
307                                    bool writethrough, bool quiet,
308                                    bool force_share)
309 {
310     BlockBackend *blk;
311     Error *local_err = NULL;
312 
313     if (!options) {
314         options = qdict_new();
315     }
316     if (fmt) {
317         qdict_put_str(options, "driver", fmt);
318     }
319 
320     if (force_share) {
321         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
322     }
323     blk = blk_new_open(filename, NULL, options, flags, &local_err);
324     if (!blk) {
325         error_reportf_err(local_err, "Could not open '%s': ", filename);
326         return NULL;
327     }
328     blk_set_enable_write_cache(blk, !writethrough);
329 
330     return blk;
331 }
332 
333 
334 static int img_add_key_secrets(void *opaque,
335                                const char *name, const char *value,
336                                Error **errp)
337 {
338     QDict *options = opaque;
339 
340     if (g_str_has_suffix(name, "key-secret")) {
341         qdict_put_str(options, name, value);
342     }
343 
344     return 0;
345 }
346 
347 
348 static BlockBackend *img_open(bool image_opts,
349                               const char *filename,
350                               const char *fmt, int flags, bool writethrough,
351                               bool quiet, bool force_share)
352 {
353     BlockBackend *blk;
354     if (image_opts) {
355         QemuOpts *opts;
356         if (fmt) {
357             error_report("--image-opts and --format are mutually exclusive");
358             return NULL;
359         }
360         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
361                                        filename, true);
362         if (!opts) {
363             return NULL;
364         }
365         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
366                             force_share);
367     } else {
368         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
369                             force_share);
370     }
371 
372     if (blk) {
373         blk_set_force_allow_inactivate(blk);
374     }
375 
376     return blk;
377 }
378 
379 
380 static int add_old_style_options(const char *fmt, QemuOpts *opts,
381                                  const char *base_filename,
382                                  const char *base_fmt)
383 {
384     if (base_filename) {
385         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
386                           NULL)) {
387             error_report("Backing file not supported for file format '%s'",
388                          fmt);
389             return -1;
390         }
391     }
392     if (base_fmt) {
393         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
394             error_report("Backing file format not supported for file "
395                          "format '%s'", fmt);
396             return -1;
397         }
398     }
399     return 0;
400 }
401 
402 static int64_t cvtnum_full(const char *name, const char *value,
403                            bool is_size, int64_t min, int64_t max)
404 {
405     int err;
406     uint64_t res;
407 
408     err = is_size ? qemu_strtosz(value, NULL, &res) :
409                     qemu_strtou64(value, NULL, 0, &res);
410     if (err < 0 && err != -ERANGE) {
411         error_report("Invalid %s specified: '%s'", name, value);
412         return err;
413     }
414     if (err == -ERANGE || res > max || res < min) {
415         error_report("Invalid %s specified. Must be between %" PRId64
416                      " and %" PRId64 ".", name, min, max);
417         return -ERANGE;
418     }
419     return res;
420 }
421 
422 static int64_t cvtnum(const char *name, const char *value, bool is_size)
423 {
424     return cvtnum_full(name, value, is_size, 0, INT64_MAX);
425 }
426 
427 static int img_create(const img_cmd_t *ccmd, int argc, char **argv)
428 {
429     int c;
430     int64_t img_size = -1;
431     const char *fmt = "raw";
432     const char *base_fmt = NULL;
433     const char *filename;
434     const char *base_filename = NULL;
435     char *options = NULL;
436     Error *local_err = NULL;
437     bool quiet = false;
438     int flags = 0;
439 
440     for(;;) {
441         static const struct option long_options[] = {
442             {"help", no_argument, 0, 'h'},
443             {"format", required_argument, 0, 'f'},
444             {"options", required_argument, 0, 'o'},
445             {"backing", required_argument, 0, 'b'},
446             {"backing-format", required_argument, 0, 'B'}, /* was -F in 10.0 */
447             {"backing-unsafe", no_argument, 0, 'u'},
448             {"quiet", no_argument, 0, 'q'},
449             {"object", required_argument, 0, OPTION_OBJECT},
450             {0, 0, 0, 0}
451         };
452         c = getopt_long(argc, argv, "hf:o:b:F:B:uq",
453                         long_options, NULL);
454         if (c == -1) {
455             break;
456         }
457         switch(c) {
458         case 'h':
459             cmd_help(ccmd, "[-f FMT] [-o FMT_OPTS]\n"
460 "        [-b BACKING_FILE [-B BACKING_FMT]] [-u]\n"
461 "        [-q] [--object OBJDEF] FILE [SIZE]\n"
462 ,
463 "  -f, --format FMT\n"
464 "     specifies the format of the new image (default: raw)\n"
465 "  -o, --options FMT_OPTS\n"
466 "     format-specific options (specify '-o help' for help)\n"
467 "  -b, --backing BACKING_FILE\n"
468 "     create target image to be a CoW on top of BACKING_FILE\n"
469 "  -B, --backing-format BACKING_FMT (was -F in <= 10.0)\n"
470 "     specifies the format of BACKING_FILE (default: probing is used)\n"
471 "  -u, --backing-unsafe\n"
472 "     do not fail if BACKING_FILE can not be read\n"
473 "  -q, --quiet\n"
474 "     quiet mode (produce only error messages if any)\n"
475 "  --object OBJDEF\n"
476 "     defines QEMU user-creatable object\n"
477 "  FILE\n"
478 "     name of the image file to create (will be overritten if already exists)\n"
479 "  SIZE[bKMGTPE]\n"
480 "     image size with optional multiplier suffix (powers of 1024)\n"
481 "     (required unless BACKING_FILE is specified)\n"
482 );
483             break;
484         case 'f':
485             fmt = optarg;
486             break;
487         case 'o':
488             if (accumulate_options(&options, optarg) < 0) {
489                 goto fail;
490             }
491             break;
492         case 'b':
493             base_filename = optarg;
494             break;
495         case 'F': /* <=10.0 */
496         case 'B':
497             base_fmt = optarg;
498             break;
499         case 'u':
500             flags |= BDRV_O_NO_BACKING;
501             break;
502         case 'q':
503             quiet = true;
504             break;
505         case OPTION_OBJECT:
506             user_creatable_process_cmdline(optarg);
507             break;
508         default:
509             tryhelp(argv[0]);
510         }
511     }
512 
513     /* Get the filename */
514     filename = (optind < argc) ? argv[optind] : NULL;
515     if (options && has_help_option(options)) {
516         g_free(options);
517         return print_block_option_help(filename, fmt);
518     }
519 
520     if (optind >= argc) {
521         error_exit(argv[0], "Expecting image file name");
522     }
523     optind++;
524 
525     /* Get image size, if specified */
526     if (optind < argc) {
527         img_size = cvtnum("image size", argv[optind++], true);
528         if (img_size < 0) {
529             goto fail;
530         }
531     }
532     if (optind != argc) {
533         error_exit(argv[0], "Unexpected argument: %s", argv[optind]);
534     }
535 
536     bdrv_img_create(filename, fmt, base_filename, base_fmt,
537                     options, img_size, flags, quiet, &local_err);
538     if (local_err) {
539         error_reportf_err(local_err, "%s: ", filename);
540         goto fail;
541     }
542 
543     g_free(options);
544     return 0;
545 
546 fail:
547     g_free(options);
548     return 1;
549 }
550 
551 static void dump_json_image_check(ImageCheck *check, bool quiet)
552 {
553     GString *str;
554     QObject *obj;
555     Visitor *v = qobject_output_visitor_new(&obj);
556 
557     visit_type_ImageCheck(v, NULL, &check, &error_abort);
558     visit_complete(v, &obj);
559     str = qobject_to_json_pretty(obj, true);
560     assert(str != NULL);
561     qprintf(quiet, "%s\n", str->str);
562     qobject_unref(obj);
563     visit_free(v);
564     g_string_free(str, true);
565 }
566 
567 static void dump_human_image_check(ImageCheck *check, bool quiet)
568 {
569     if (!(check->corruptions || check->leaks || check->check_errors)) {
570         qprintf(quiet, "No errors were found on the image.\n");
571     } else {
572         if (check->corruptions) {
573             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
574                     "Data may be corrupted, or further writes to the image "
575                     "may corrupt it.\n",
576                     check->corruptions);
577         }
578 
579         if (check->leaks) {
580             qprintf(quiet,
581                     "\n%" PRId64 " leaked clusters were found on the image.\n"
582                     "This means waste of disk space, but no harm to data.\n",
583                     check->leaks);
584         }
585 
586         if (check->check_errors) {
587             qprintf(quiet,
588                     "\n%" PRId64
589                     " internal errors have occurred during the check.\n",
590                     check->check_errors);
591         }
592     }
593 
594     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
595         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
596                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
597                 check->allocated_clusters, check->total_clusters,
598                 check->allocated_clusters * 100.0 / check->total_clusters,
599                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
600                 check->compressed_clusters * 100.0 /
601                 check->allocated_clusters);
602     }
603 
604     if (check->image_end_offset) {
605         qprintf(quiet,
606                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
607     }
608 }
609 
610 static int collect_image_check(BlockDriverState *bs,
611                    ImageCheck *check,
612                    const char *filename,
613                    const char *fmt,
614                    int fix)
615 {
616     int ret;
617     BdrvCheckResult result;
618 
619     ret = bdrv_check(bs, &result, fix);
620     if (ret < 0) {
621         return ret;
622     }
623 
624     check->filename                 = g_strdup(filename);
625     check->format                   = g_strdup(bdrv_get_format_name(bs));
626     check->check_errors             = result.check_errors;
627     check->corruptions              = result.corruptions;
628     check->has_corruptions          = result.corruptions != 0;
629     check->leaks                    = result.leaks;
630     check->has_leaks                = result.leaks != 0;
631     check->corruptions_fixed        = result.corruptions_fixed;
632     check->has_corruptions_fixed    = result.corruptions_fixed != 0;
633     check->leaks_fixed              = result.leaks_fixed;
634     check->has_leaks_fixed          = result.leaks_fixed != 0;
635     check->image_end_offset         = result.image_end_offset;
636     check->has_image_end_offset     = result.image_end_offset != 0;
637     check->total_clusters           = result.bfi.total_clusters;
638     check->has_total_clusters       = result.bfi.total_clusters != 0;
639     check->allocated_clusters       = result.bfi.allocated_clusters;
640     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
641     check->fragmented_clusters      = result.bfi.fragmented_clusters;
642     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
643     check->compressed_clusters      = result.bfi.compressed_clusters;
644     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
645 
646     return 0;
647 }
648 
649 /*
650  * Checks an image for consistency. Exit codes:
651  *
652  *  0 - Check completed, image is good
653  *  1 - Check not completed because of internal errors
654  *  2 - Check completed, image is corrupted
655  *  3 - Check completed, image has leaked clusters, but is good otherwise
656  * 63 - Checks are not supported by the image format
657  */
658 static int img_check(const img_cmd_t *ccmd, int argc, char **argv)
659 {
660     int c, ret;
661     OutputFormat output_format = OFORMAT_HUMAN;
662     const char *filename, *fmt, *cache;
663     BlockBackend *blk;
664     BlockDriverState *bs;
665     int fix = 0;
666     int flags = BDRV_O_CHECK;
667     bool writethrough;
668     ImageCheck *check;
669     bool quiet = false;
670     bool image_opts = false;
671     bool force_share = false;
672 
673     fmt = NULL;
674     cache = BDRV_DEFAULT_CACHE;
675 
676     for(;;) {
677         int option_index = 0;
678         static const struct option long_options[] = {
679             {"help", no_argument, 0, 'h'},
680             {"format", required_argument, 0, 'f'},
681             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
682             {"cache", required_argument, 0, 'T'},
683             {"repair", required_argument, 0, 'r'},
684             {"force-share", no_argument, 0, 'U'},
685             {"output", required_argument, 0, OPTION_OUTPUT},
686             {"quiet", no_argument, 0, 'q'},
687             {"object", required_argument, 0, OPTION_OBJECT},
688             {0, 0, 0, 0}
689         };
690         c = getopt_long(argc, argv, "hf:T:r:Uq",
691                         long_options, &option_index);
692         if (c == -1) {
693             break;
694         }
695         switch(c) {
696         case 'h':
697             cmd_help(ccmd, "[-f FMT | --image-opts] [-T CACHE_MODE] [-r leaks|all]\n"
698 "        [-U] [--output human|json] [-q] [--object OBJDEF] FILE\n"
699 ,
700 "  -f, --format FMT\n"
701 "     specifies the format of the image explicitly (default: probing is used)\n"
702 "  --image-opts\n"
703 "     treat FILE as an option string (key=value,..), not a file name\n"
704 "     (incompatible with -f|--format)\n"
705 "  -T, --cache CACHE_MODE\n" /* why not -t ? */
706 "     cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
707 "  -r, --repair leaks|all\n"
708 "     repair errors of the given category in the image (image will be\n"
709 "     opened in read-write mode, incompatible with -U|--force-share)\n"
710 "  -U, --force-share\n"
711 "     open image in shared mode for concurrent access\n"
712 "  --output human|json\n"
713 "     output format (default: human)\n"
714 "  -q, --quiet\n"
715 "     quiet mode (produce only error messages if any)\n"
716 "  --object OBJDEF\n"
717 "     defines QEMU user-creatable object\n"
718 "  FILE\n"
719 "     name of the image file, or an option string (key=value,..)\n"
720 "     with --image-opts, to operate on\n"
721 );
722             break;
723         case 'f':
724             fmt = optarg;
725             break;
726         case OPTION_IMAGE_OPTS:
727             image_opts = true;
728             break;
729         case 'T':
730             cache = optarg;
731             break;
732         case 'r':
733             flags |= BDRV_O_RDWR;
734 
735             if (!strcmp(optarg, "leaks")) {
736                 fix = BDRV_FIX_LEAKS;
737             } else if (!strcmp(optarg, "all")) {
738                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
739             } else {
740                 error_exit(argv[0],
741                            "--repair (-r) expects 'leaks' or 'all', not '%s'",
742                            optarg);
743             }
744             break;
745         case 'U':
746             force_share = true;
747             break;
748         case OPTION_OUTPUT:
749             output_format = parse_output_format(argv[0], optarg);
750             break;
751         case 'q':
752             quiet = true;
753             break;
754         case OPTION_OBJECT:
755             user_creatable_process_cmdline(optarg);
756             break;
757         default:
758             tryhelp(argv[0]);
759         }
760     }
761     if (optind != argc - 1) {
762         error_exit(argv[0], "Expecting one image file name");
763     }
764     filename = argv[optind++];
765 
766     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
767     if (ret < 0) {
768         error_report("Invalid source cache option: %s", cache);
769         return 1;
770     }
771 
772     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
773                    force_share);
774     if (!blk) {
775         return 1;
776     }
777     bs = blk_bs(blk);
778 
779     check = g_new0(ImageCheck, 1);
780     ret = collect_image_check(bs, check, filename, fmt, fix);
781 
782     if (ret == -ENOTSUP) {
783         error_report("This image format does not support checks");
784         ret = 63;
785         goto fail;
786     }
787 
788     if (check->corruptions_fixed || check->leaks_fixed) {
789         int corruptions_fixed, leaks_fixed;
790         bool has_leaks_fixed, has_corruptions_fixed;
791 
792         leaks_fixed         = check->leaks_fixed;
793         has_leaks_fixed     = check->has_leaks_fixed;
794         corruptions_fixed   = check->corruptions_fixed;
795         has_corruptions_fixed = check->has_corruptions_fixed;
796 
797         if (output_format == OFORMAT_HUMAN) {
798             qprintf(quiet,
799                     "The following inconsistencies were found and repaired:\n\n"
800                     "    %" PRId64 " leaked clusters\n"
801                     "    %" PRId64 " corruptions\n\n"
802                     "Double checking the fixed image now...\n",
803                     check->leaks_fixed,
804                     check->corruptions_fixed);
805         }
806 
807         qapi_free_ImageCheck(check);
808         check = g_new0(ImageCheck, 1);
809         ret = collect_image_check(bs, check, filename, fmt, 0);
810 
811         check->leaks_fixed          = leaks_fixed;
812         check->has_leaks_fixed      = has_leaks_fixed;
813         check->corruptions_fixed    = corruptions_fixed;
814         check->has_corruptions_fixed = has_corruptions_fixed;
815     }
816 
817     if (!ret) {
818         switch (output_format) {
819         case OFORMAT_HUMAN:
820             dump_human_image_check(check, quiet);
821             break;
822         case OFORMAT_JSON:
823             dump_json_image_check(check, quiet);
824             break;
825         }
826     }
827 
828     if (ret || check->check_errors) {
829         if (ret) {
830             error_report("Check failed: %s", strerror(-ret));
831         } else {
832             error_report("Check failed");
833         }
834         ret = 1;
835         goto fail;
836     }
837 
838     if (check->corruptions) {
839         ret = 2;
840     } else if (check->leaks) {
841         ret = 3;
842     } else {
843         ret = 0;
844     }
845 
846 fail:
847     qapi_free_ImageCheck(check);
848     blk_unref(blk);
849     return ret;
850 }
851 
852 typedef struct CommonBlockJobCBInfo {
853     BlockDriverState *bs;
854     Error **errp;
855 } CommonBlockJobCBInfo;
856 
857 static void common_block_job_cb(void *opaque, int ret)
858 {
859     CommonBlockJobCBInfo *cbi = opaque;
860 
861     if (ret < 0) {
862         error_setg_errno(cbi->errp, -ret, "Block job failed");
863     }
864 }
865 
866 static void run_block_job(BlockJob *job, Error **errp)
867 {
868     uint64_t progress_current, progress_total;
869     AioContext *aio_context = block_job_get_aio_context(job);
870     int ret = 0;
871 
872     job_lock();
873     job_ref_locked(&job->job);
874     do {
875         float progress = 0.0f;
876         job_unlock();
877         aio_poll(aio_context, true);
878 
879         progress_get_snapshot(&job->job.progress, &progress_current,
880                               &progress_total);
881         if (progress_total) {
882             progress = (float)progress_current / progress_total * 100.f;
883         }
884         qemu_progress_print(progress, 0);
885         job_lock();
886     } while (!job_is_ready_locked(&job->job) &&
887              !job_is_completed_locked(&job->job));
888 
889     if (!job_is_completed_locked(&job->job)) {
890         ret = job_complete_sync_locked(&job->job, errp);
891     } else {
892         ret = job->job.ret;
893     }
894     job_unref_locked(&job->job);
895     job_unlock();
896 
897     /* publish completion progress only when success */
898     if (!ret) {
899         qemu_progress_print(100.f, 0);
900     }
901 }
902 
903 static int img_commit(const img_cmd_t *ccmd, int argc, char **argv)
904 {
905     int c, ret, flags;
906     const char *filename, *fmt, *cache, *base;
907     BlockBackend *blk;
908     BlockDriverState *bs, *base_bs;
909     BlockJob *job;
910     bool progress = false, quiet = false, drop = false;
911     bool writethrough;
912     Error *local_err = NULL;
913     CommonBlockJobCBInfo cbi;
914     bool image_opts = false;
915     int64_t rate_limit = 0;
916 
917     fmt = NULL;
918     cache = BDRV_DEFAULT_CACHE;
919     base = NULL;
920     for(;;) {
921         static const struct option long_options[] = {
922             {"help", no_argument, 0, 'h'},
923             {"format", required_argument, 0, 'f'},
924             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
925             {"cache", required_argument, 0, 't'},
926             {"drop", no_argument, 0, 'd'},
927             {"base", required_argument, 0, 'b'},
928             {"rate-limit", required_argument, 0, 'r'},
929             {"progress", no_argument, 0, 'p'},
930             {"quiet", no_argument, 0, 'q'},
931             {"object", required_argument, 0, OPTION_OBJECT},
932             {0, 0, 0, 0}
933         };
934         c = getopt_long(argc, argv, "hf:t:db:r:pq",
935                         long_options, NULL);
936         if (c == -1) {
937             break;
938         }
939         switch(c) {
940         case 'h':
941             cmd_help(ccmd, "[-f FMT | --image-opts] [-t CACHE_MODE] [-b BASE_IMG]\n"
942 "        [-d] [-r RATE] [-q] [--object OBJDEF] FILE\n"
943 ,
944 "  -f, --format FMT\n"
945 "     specify FILE image format explicitly (default: probing is used)\n"
946 "  --image-opts\n"
947 "     treat FILE as an option string (key=value,..), not a file name\n"
948 "     (incompatible with -f|--format)\n"
949 "  -t, --cache CACHE_MODE image cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
950 "  -d, --drop\n"
951 "     skip emptying FILE on completion\n"
952 "  -b, --base BASE_IMG\n"
953 "     image in the backing chain to commit change to\n"
954 "     (default: immediate backing file; implies --drop)\n"
955 "  -r, --rate-limit RATE\n"
956 "     I/O rate limit, in bytes per second\n"
957 "  -p, --progress\n"
958 "     display progress information\n"
959 "  -q, --quiet\n"
960 "     quiet mode (produce only error messages if any)\n"
961 "  --object OBJDEF\n"
962 "     defines QEMU user-creatable object\n"
963 "  FILE\n"
964 "     name of the image file, or an option string (key=value,..)\n"
965 "     with --image-opts, to operate on\n"
966 );
967             break;
968         case 'f':
969             fmt = optarg;
970             break;
971         case OPTION_IMAGE_OPTS:
972             image_opts = true;
973             break;
974         case 't':
975             cache = optarg;
976             break;
977         case 'd':
978             drop = true;
979             break;
980         case 'b':
981             base = optarg;
982             /* -b implies -d */
983             drop = true;
984             break;
985         case 'r':
986             rate_limit = cvtnum("rate limit", optarg, true);
987             if (rate_limit < 0) {
988                 return 1;
989             }
990             break;
991         case 'p':
992             progress = true;
993             break;
994         case 'q':
995             quiet = true;
996             break;
997         case OPTION_OBJECT:
998             user_creatable_process_cmdline(optarg);
999             break;
1000         default:
1001             tryhelp(argv[0]);
1002         }
1003     }
1004 
1005     /* Progress is not shown in Quiet mode */
1006     if (quiet) {
1007         progress = false;
1008     }
1009 
1010     if (optind != argc - 1) {
1011         error_exit(argv[0], "Expecting one image file name");
1012     }
1013     filename = argv[optind++];
1014 
1015     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
1016     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1017     if (ret < 0) {
1018         error_report("Invalid cache option: %s", cache);
1019         return 1;
1020     }
1021 
1022     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1023                    false);
1024     if (!blk) {
1025         return 1;
1026     }
1027     bs = blk_bs(blk);
1028 
1029     qemu_progress_init(progress, 1.f);
1030     qemu_progress_print(0.f, 100);
1031 
1032     bdrv_graph_rdlock_main_loop();
1033     if (base) {
1034         base_bs = bdrv_find_backing_image(bs, base);
1035         if (!base_bs) {
1036             error_setg(&local_err,
1037                        "Did not find '%s' in the backing chain of '%s'",
1038                        base, filename);
1039             bdrv_graph_rdunlock_main_loop();
1040             goto done;
1041         }
1042     } else {
1043         /* This is different from QMP, which by default uses the deepest file in
1044          * the backing chain (i.e., the very base); however, the traditional
1045          * behavior of qemu-img commit is using the immediate backing file. */
1046         base_bs = bdrv_backing_chain_next(bs);
1047         if (!base_bs) {
1048             error_setg(&local_err, "Image does not have a backing file");
1049             bdrv_graph_rdunlock_main_loop();
1050             goto done;
1051         }
1052     }
1053     bdrv_graph_rdunlock_main_loop();
1054 
1055     cbi = (CommonBlockJobCBInfo){
1056         .errp = &local_err,
1057         .bs   = bs,
1058     };
1059 
1060     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit,
1061                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1062                         &cbi, false, &local_err);
1063     if (local_err) {
1064         goto done;
1065     }
1066 
1067     /* When the block job completes, the BlockBackend reference will point to
1068      * the old backing file. In order to avoid that the top image is already
1069      * deleted, so we can still empty it afterwards, increment the reference
1070      * counter here preemptively. */
1071     if (!drop) {
1072         bdrv_ref(bs);
1073     }
1074 
1075     job = block_job_get("commit");
1076     assert(job);
1077     run_block_job(job, &local_err);
1078     if (local_err) {
1079         goto unref_backing;
1080     }
1081 
1082     if (!drop) {
1083         BlockBackend *old_backing_blk;
1084 
1085         old_backing_blk = blk_new_with_bs(bs, BLK_PERM_WRITE, BLK_PERM_ALL,
1086                                           &local_err);
1087         if (!old_backing_blk) {
1088             goto unref_backing;
1089         }
1090         ret = blk_make_empty(old_backing_blk, &local_err);
1091         blk_unref(old_backing_blk);
1092         if (ret == -ENOTSUP) {
1093             error_free(local_err);
1094             local_err = NULL;
1095         } else if (ret < 0) {
1096             goto unref_backing;
1097         }
1098     }
1099 
1100 unref_backing:
1101     if (!drop) {
1102         bdrv_unref(bs);
1103     }
1104 
1105 done:
1106     qemu_progress_end();
1107 
1108     /*
1109      * Manually inactivate the image first because this way we can know whether
1110      * an error occurred. blk_unref() doesn't tell us about failures.
1111      */
1112     ret = bdrv_inactivate_all();
1113     if (ret < 0 && !local_err) {
1114         error_setg_errno(&local_err, -ret, "Error while closing the image");
1115     }
1116     blk_unref(blk);
1117 
1118     if (local_err) {
1119         error_report_err(local_err);
1120         return 1;
1121     }
1122 
1123     qprintf(quiet, "Image committed.\n");
1124     return 0;
1125 }
1126 
1127 /*
1128  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1129  * of the first sector boundary within buf where the sector contains a
1130  * non-zero byte.  This function is robust to a buffer that is not
1131  * sector-aligned.
1132  */
1133 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1134 {
1135     int64_t i;
1136     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1137 
1138     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1139         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1140             return i;
1141         }
1142     }
1143     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1144         return i;
1145     }
1146     return -1;
1147 }
1148 
1149 /*
1150  * Returns true iff the first sector pointed to by 'buf' contains at least
1151  * a non-NUL byte.
1152  *
1153  * 'pnum' is set to the number of sectors (including and immediately following
1154  * the first one) that are known to be in the same allocated/unallocated state.
1155  * The function will try to align the end offset to alignment boundaries so
1156  * that the request will at least end aligned and consecutive requests will
1157  * also start at an aligned offset.
1158  */
1159 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1160                                 int64_t sector_num, int alignment)
1161 {
1162     bool is_zero;
1163     int i, tail;
1164 
1165     if (n <= 0) {
1166         *pnum = 0;
1167         return 0;
1168     }
1169     is_zero = buffer_is_zero(buf, BDRV_SECTOR_SIZE);
1170     for(i = 1; i < n; i++) {
1171         buf += BDRV_SECTOR_SIZE;
1172         if (is_zero != buffer_is_zero(buf, BDRV_SECTOR_SIZE)) {
1173             break;
1174         }
1175     }
1176 
1177     if (i == n) {
1178         /*
1179          * The whole buf is the same.
1180          * No reason to split it into chunks, so return now.
1181          */
1182         *pnum = i;
1183         return !is_zero;
1184     }
1185 
1186     tail = (sector_num + i) & (alignment - 1);
1187     if (tail) {
1188         if (is_zero && i <= tail) {
1189             /*
1190              * For sure next sector after i is data, and it will rewrite this
1191              * tail anyway due to RMW. So, let's just write data now.
1192              */
1193             is_zero = false;
1194         }
1195         if (!is_zero) {
1196             /* If possible, align up end offset of allocated areas. */
1197             i += alignment - tail;
1198             i = MIN(i, n);
1199         } else {
1200             /*
1201              * For sure next sector after i is data, and it will rewrite this
1202              * tail anyway due to RMW. Better is avoid RMW and write zeroes up
1203              * to aligned bound.
1204              */
1205             i -= tail;
1206         }
1207     }
1208     *pnum = i;
1209     return !is_zero;
1210 }
1211 
1212 /*
1213  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1214  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1215  * breaking up write requests for only small sparse areas.
1216  */
1217 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1218     int min, int64_t sector_num, int alignment)
1219 {
1220     int ret;
1221     int num_checked, num_used;
1222 
1223     if (n < min) {
1224         min = n;
1225     }
1226 
1227     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1228     if (!ret) {
1229         return ret;
1230     }
1231 
1232     num_used = *pnum;
1233     buf += BDRV_SECTOR_SIZE * *pnum;
1234     n -= *pnum;
1235     sector_num += *pnum;
1236     num_checked = num_used;
1237 
1238     while (n > 0) {
1239         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1240 
1241         buf += BDRV_SECTOR_SIZE * *pnum;
1242         n -= *pnum;
1243         sector_num += *pnum;
1244         num_checked += *pnum;
1245         if (ret) {
1246             num_used = num_checked;
1247         } else if (*pnum >= min) {
1248             break;
1249         }
1250     }
1251 
1252     *pnum = num_used;
1253     return 1;
1254 }
1255 
1256 /*
1257  * Compares two buffers chunk by chunk, where @chsize is the chunk size.
1258  * If @chsize is 0, default chunk size of BDRV_SECTOR_SIZE is used.
1259  * Returns 0 if the first chunk of each buffer matches, non-zero otherwise.
1260  *
1261  * @pnum is set to the size of the buffer prefix aligned to @chsize that
1262  * has the same matching status as the first chunk.
1263  */
1264 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1265                            int64_t bytes, uint64_t chsize, int64_t *pnum)
1266 {
1267     bool res;
1268     int64_t i;
1269 
1270     assert(bytes > 0);
1271 
1272     if (!chsize) {
1273         chsize = BDRV_SECTOR_SIZE;
1274     }
1275     i = MIN(bytes, chsize);
1276 
1277     res = !!memcmp(buf1, buf2, i);
1278     while (i < bytes) {
1279         int64_t len = MIN(bytes - i, chsize);
1280 
1281         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1282             break;
1283         }
1284         i += len;
1285     }
1286 
1287     *pnum = i;
1288     return res;
1289 }
1290 
1291 #define IO_BUF_SIZE (2 * MiB)
1292 
1293 /*
1294  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1295  *
1296  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1297  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1298  * failure), and 4 on error (the exit status for read errors), after emitting
1299  * an error message.
1300  *
1301  * @param blk:  BlockBackend for the image
1302  * @param offset: Starting offset to check
1303  * @param bytes: Number of bytes to check
1304  * @param filename: Name of disk file we are checking (logging purpose)
1305  * @param buffer: Allocated buffer for storing read data
1306  * @param quiet: Flag for quiet mode
1307  */
1308 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1309                                int64_t bytes, const char *filename,
1310                                uint8_t *buffer, bool quiet)
1311 {
1312     int ret = 0;
1313     int64_t idx;
1314 
1315     ret = blk_pread(blk, offset, bytes, buffer, 0);
1316     if (ret < 0) {
1317         error_report("Error while reading offset %" PRId64 " of %s: %s",
1318                      offset, filename, strerror(-ret));
1319         return 4;
1320     }
1321     idx = find_nonzero(buffer, bytes);
1322     if (idx >= 0) {
1323         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1324                 offset + idx);
1325         return 1;
1326     }
1327 
1328     return 0;
1329 }
1330 
1331 /*
1332  * Compares two images. Exit codes:
1333  *
1334  * 0 - Images are identical or the requested help was printed
1335  * 1 - Images differ
1336  * >1 - Error occurred
1337  */
1338 static int img_compare(const img_cmd_t *ccmd, int argc, char **argv)
1339 {
1340     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1341     BlockBackend *blk1, *blk2;
1342     BlockDriverState *bs1, *bs2;
1343     int64_t total_size1, total_size2;
1344     uint8_t *buf1 = NULL, *buf2 = NULL;
1345     int64_t pnum1, pnum2;
1346     int allocated1, allocated2;
1347     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1348     bool progress = false, quiet = false, strict = false;
1349     int flags;
1350     bool writethrough;
1351     int64_t total_size;
1352     int64_t offset = 0;
1353     int64_t chunk;
1354     int c;
1355     uint64_t progress_base;
1356     bool image_opts = false;
1357     bool force_share = false;
1358 
1359     cache = BDRV_DEFAULT_CACHE;
1360     for (;;) {
1361         static const struct option long_options[] = {
1362             {"help", no_argument, 0, 'h'},
1363             {"a-format", required_argument, 0, 'f'},
1364             {"b-format", required_argument, 0, 'F'},
1365             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1366             {"strict", no_argument, 0, 's'},
1367             {"cache", required_argument, 0, 'T'},
1368             {"force-share", no_argument, 0, 'U'},
1369             {"progress", no_argument, 0, 'p'},
1370             {"quiet", no_argument, 0, 'q'},
1371             {"object", required_argument, 0, OPTION_OBJECT},
1372             {0, 0, 0, 0}
1373         };
1374         c = getopt_long(argc, argv, "hf:F:sT:Upq",
1375                         long_options, NULL);
1376         if (c == -1) {
1377             break;
1378         }
1379         switch (c) {
1380         case 'h':
1381             cmd_help(ccmd,
1382 "[[-f FMT] [-F FMT] | --image-opts] [-s] [-T CACHE]\n"
1383 "        [-U] [-p] [-q] [--object OBJDEF] FILE1 FILE2\n"
1384 ,
1385 "  -f, --a-format FMT\n"
1386 "     specify FILE1 image format explicitly (default: probing is used)\n"
1387 "  -F, --b-format FMT\n"
1388 "     specify FILE2 image format explicitly (default: probing is used)\n"
1389 "  --image-opts\n"
1390 "     treat FILE1 and FILE2 as option strings (key=value,..), not file names\n"
1391 "     (incompatible with -f|--a-format and -F|--b-format)\n"
1392 "  -s, --strict\n"
1393 "     strict mode, also check if sizes are equal\n"
1394 "  -T, --cache CACHE_MODE\n"
1395 "     images caching mode (default: " BDRV_DEFAULT_CACHE ")\n"
1396 "  -U, --force-share\n"
1397 "     open images in shared mode for concurrent access\n"
1398 "  -p, --progress\n"
1399 "     display progress information\n"
1400 "  -q, --quiet\n"
1401 "     quiet mode (produce only error messages if any)\n"
1402 "  --object OBJDEF\n"
1403 "     defines QEMU user-creatable object\n"
1404 "  FILE1, FILE2\n"
1405 "     names of the image files, or option strings (key=value,..)\n"
1406 "     with --image-opts, to compare\n"
1407 );
1408             break;
1409         case 'f':
1410             fmt1 = optarg;
1411             break;
1412         case 'F':
1413             fmt2 = optarg;
1414             break;
1415         case OPTION_IMAGE_OPTS:
1416             image_opts = true;
1417             break;
1418         case 's':
1419             strict = true;
1420             break;
1421         case 'T':
1422             cache = optarg;
1423             break;
1424         case 'U':
1425             force_share = true;
1426             break;
1427         case 'p':
1428             progress = true;
1429             break;
1430         case 'q':
1431             quiet = true;
1432             break;
1433         case OPTION_OBJECT:
1434             user_creatable_process_cmdline(optarg);
1435             break;
1436         default:
1437             tryhelp(argv[0]);
1438         }
1439     }
1440 
1441     /* Progress is not shown in Quiet mode */
1442     if (quiet) {
1443         progress = false;
1444     }
1445 
1446 
1447     if (optind != argc - 2) {
1448         error_exit(argv[0], "Expecting two image file names");
1449     }
1450     filename1 = argv[optind++];
1451     filename2 = argv[optind++];
1452 
1453     /* Initialize before goto out */
1454     qemu_progress_init(progress, 2.0);
1455 
1456     flags = 0;
1457     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1458     if (ret < 0) {
1459         error_report("Invalid source cache option: %s", cache);
1460         ret = 2;
1461         goto out3;
1462     }
1463 
1464     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1465                     force_share);
1466     if (!blk1) {
1467         ret = 2;
1468         goto out3;
1469     }
1470 
1471     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1472                     force_share);
1473     if (!blk2) {
1474         ret = 2;
1475         goto out2;
1476     }
1477     bs1 = blk_bs(blk1);
1478     bs2 = blk_bs(blk2);
1479 
1480     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1481     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1482     total_size1 = blk_getlength(blk1);
1483     if (total_size1 < 0) {
1484         error_report("Can't get size of %s: %s",
1485                      filename1, strerror(-total_size1));
1486         ret = 4;
1487         goto out;
1488     }
1489     total_size2 = blk_getlength(blk2);
1490     if (total_size2 < 0) {
1491         error_report("Can't get size of %s: %s",
1492                      filename2, strerror(-total_size2));
1493         ret = 4;
1494         goto out;
1495     }
1496     total_size = MIN(total_size1, total_size2);
1497     progress_base = MAX(total_size1, total_size2);
1498 
1499     qemu_progress_print(0, 100);
1500 
1501     if (strict && total_size1 != total_size2) {
1502         ret = 1;
1503         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1504         goto out;
1505     }
1506 
1507     while (offset < total_size) {
1508         int status1, status2;
1509 
1510         status1 = bdrv_block_status_above(bs1, NULL, offset,
1511                                           total_size1 - offset, &pnum1, NULL,
1512                                           NULL);
1513         if (status1 < 0) {
1514             ret = 3;
1515             error_report("Sector allocation test failed for %s", filename1);
1516             goto out;
1517         }
1518         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1519 
1520         status2 = bdrv_block_status_above(bs2, NULL, offset,
1521                                           total_size2 - offset, &pnum2, NULL,
1522                                           NULL);
1523         if (status2 < 0) {
1524             ret = 3;
1525             error_report("Sector allocation test failed for %s", filename2);
1526             goto out;
1527         }
1528         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1529 
1530         assert(pnum1 && pnum2);
1531         chunk = MIN(pnum1, pnum2);
1532 
1533         if (strict) {
1534             if (status1 != status2) {
1535                 ret = 1;
1536                 qprintf(quiet, "Strict mode: Offset %" PRId64
1537                         " block status mismatch!\n", offset);
1538                 goto out;
1539             }
1540         }
1541         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1542             /* nothing to do */
1543         } else if (allocated1 == allocated2) {
1544             if (allocated1) {
1545                 int64_t pnum;
1546 
1547                 chunk = MIN(chunk, IO_BUF_SIZE);
1548                 ret = blk_pread(blk1, offset, chunk, buf1, 0);
1549                 if (ret < 0) {
1550                     error_report("Error while reading offset %" PRId64
1551                                  " of %s: %s",
1552                                  offset, filename1, strerror(-ret));
1553                     ret = 4;
1554                     goto out;
1555                 }
1556                 ret = blk_pread(blk2, offset, chunk, buf2, 0);
1557                 if (ret < 0) {
1558                     error_report("Error while reading offset %" PRId64
1559                                  " of %s: %s",
1560                                  offset, filename2, strerror(-ret));
1561                     ret = 4;
1562                     goto out;
1563                 }
1564                 ret = compare_buffers(buf1, buf2, chunk, 0, &pnum);
1565                 if (ret || pnum != chunk) {
1566                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1567                             offset + (ret ? 0 : pnum));
1568                     ret = 1;
1569                     goto out;
1570                 }
1571             }
1572         } else {
1573             chunk = MIN(chunk, IO_BUF_SIZE);
1574             if (allocated1) {
1575                 ret = check_empty_sectors(blk1, offset, chunk,
1576                                           filename1, buf1, quiet);
1577             } else {
1578                 ret = check_empty_sectors(blk2, offset, chunk,
1579                                           filename2, buf1, quiet);
1580             }
1581             if (ret) {
1582                 goto out;
1583             }
1584         }
1585         offset += chunk;
1586         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1587     }
1588 
1589     if (total_size1 != total_size2) {
1590         BlockBackend *blk_over;
1591         const char *filename_over;
1592 
1593         qprintf(quiet, "Warning: Image size mismatch!\n");
1594         if (total_size1 > total_size2) {
1595             blk_over = blk1;
1596             filename_over = filename1;
1597         } else {
1598             blk_over = blk2;
1599             filename_over = filename2;
1600         }
1601 
1602         while (offset < progress_base) {
1603             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1604                                           progress_base - offset, &chunk,
1605                                           NULL, NULL);
1606             if (ret < 0) {
1607                 ret = 3;
1608                 error_report("Sector allocation test failed for %s",
1609                              filename_over);
1610                 goto out;
1611 
1612             }
1613             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1614                 chunk = MIN(chunk, IO_BUF_SIZE);
1615                 ret = check_empty_sectors(blk_over, offset, chunk,
1616                                           filename_over, buf1, quiet);
1617                 if (ret) {
1618                     goto out;
1619                 }
1620             }
1621             offset += chunk;
1622             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1623         }
1624     }
1625 
1626     qprintf(quiet, "Images are identical.\n");
1627     ret = 0;
1628 
1629 out:
1630     qemu_vfree(buf1);
1631     qemu_vfree(buf2);
1632     blk_unref(blk2);
1633 out2:
1634     blk_unref(blk1);
1635 out3:
1636     qemu_progress_end();
1637     return ret;
1638 }
1639 
1640 /* Convenience wrapper around qmp_block_dirty_bitmap_merge */
1641 static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name,
1642                                   const char *src_node, const char *src_name,
1643                                   Error **errp)
1644 {
1645     BlockDirtyBitmapOrStr *merge_src;
1646     BlockDirtyBitmapOrStrList *list = NULL;
1647 
1648     merge_src = g_new0(BlockDirtyBitmapOrStr, 1);
1649     merge_src->type = QTYPE_QDICT;
1650     merge_src->u.external.node = g_strdup(src_node);
1651     merge_src->u.external.name = g_strdup(src_name);
1652     QAPI_LIST_PREPEND(list, merge_src);
1653     qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp);
1654     qapi_free_BlockDirtyBitmapOrStrList(list);
1655 }
1656 
1657 enum ImgConvertBlockStatus {
1658     BLK_DATA,
1659     BLK_ZERO,
1660     BLK_BACKING_FILE,
1661 };
1662 
1663 #define MAX_COROUTINES 16
1664 #define CONVERT_THROTTLE_GROUP "img_convert"
1665 
1666 typedef struct ImgConvertState {
1667     BlockBackend **src;
1668     int64_t *src_sectors;
1669     int *src_alignment;
1670     int src_num;
1671     int64_t total_sectors;
1672     int64_t allocated_sectors;
1673     int64_t allocated_done;
1674     int64_t sector_num;
1675     int64_t wr_offs;
1676     enum ImgConvertBlockStatus status;
1677     int64_t sector_next_status;
1678     BlockBackend *target;
1679     bool has_zero_init;
1680     bool compressed;
1681     bool target_is_new;
1682     bool target_has_backing;
1683     int64_t target_backing_sectors; /* negative if unknown */
1684     bool wr_in_order;
1685     bool copy_range;
1686     bool salvage;
1687     bool quiet;
1688     int min_sparse;
1689     int alignment;
1690     size_t cluster_sectors;
1691     size_t buf_sectors;
1692     long num_coroutines;
1693     int running_coroutines;
1694     Coroutine *co[MAX_COROUTINES];
1695     int64_t wait_sector_num[MAX_COROUTINES];
1696     CoMutex lock;
1697     int ret;
1698 } ImgConvertState;
1699 
1700 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1701                                 int *src_cur, int64_t *src_cur_offset)
1702 {
1703     *src_cur = 0;
1704     *src_cur_offset = 0;
1705     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1706         *src_cur_offset += s->src_sectors[*src_cur];
1707         (*src_cur)++;
1708         assert(*src_cur < s->src_num);
1709     }
1710 }
1711 
1712 static int coroutine_mixed_fn GRAPH_RDLOCK
1713 convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1714 {
1715     int64_t src_cur_offset;
1716     int ret, n, src_cur;
1717     bool post_backing_zero = false;
1718 
1719     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1720 
1721     assert(s->total_sectors > sector_num);
1722     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1723 
1724     if (s->target_backing_sectors >= 0) {
1725         if (sector_num >= s->target_backing_sectors) {
1726             post_backing_zero = true;
1727         } else if (sector_num + n > s->target_backing_sectors) {
1728             /* Split requests around target_backing_sectors (because
1729              * starting from there, zeros are handled differently) */
1730             n = s->target_backing_sectors - sector_num;
1731         }
1732     }
1733 
1734     if (s->sector_next_status <= sector_num) {
1735         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1736         int64_t count;
1737         int tail;
1738         BlockDriverState *src_bs = blk_bs(s->src[src_cur]);
1739         BlockDriverState *base;
1740 
1741         if (s->target_has_backing) {
1742             base = bdrv_cow_bs(bdrv_skip_filters(src_bs));
1743         } else {
1744             base = NULL;
1745         }
1746 
1747         do {
1748             count = n * BDRV_SECTOR_SIZE;
1749 
1750             ret = bdrv_block_status_above(src_bs, base, offset, count, &count,
1751                                           NULL, NULL);
1752 
1753             if (ret < 0) {
1754                 if (s->salvage) {
1755                     if (n == 1) {
1756                         if (!s->quiet) {
1757                             warn_report("error while reading block status at "
1758                                         "offset %" PRIu64 ": %s", offset,
1759                                         strerror(-ret));
1760                         }
1761                         /* Just try to read the data, then */
1762                         ret = BDRV_BLOCK_DATA;
1763                         count = BDRV_SECTOR_SIZE;
1764                     } else {
1765                         /* Retry on a shorter range */
1766                         n = DIV_ROUND_UP(n, 4);
1767                     }
1768                 } else {
1769                     error_report("error while reading block status at offset "
1770                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1771                     return ret;
1772                 }
1773             }
1774         } while (ret < 0);
1775 
1776         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1777 
1778         /*
1779          * Avoid that s->sector_next_status becomes unaligned to the source
1780          * request alignment and/or cluster size to avoid unnecessary read
1781          * cycles.
1782          */
1783         tail = (sector_num - src_cur_offset + n) % s->src_alignment[src_cur];
1784         if (n > tail) {
1785             n -= tail;
1786         }
1787 
1788         if (ret & BDRV_BLOCK_ZERO) {
1789             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1790         } else if (ret & BDRV_BLOCK_DATA) {
1791             s->status = BLK_DATA;
1792         } else {
1793             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1794         }
1795 
1796         s->sector_next_status = sector_num + n;
1797     }
1798 
1799     n = MIN(n, s->sector_next_status - sector_num);
1800     if (s->status == BLK_DATA) {
1801         n = MIN(n, s->buf_sectors);
1802     }
1803 
1804     /* We need to write complete clusters for compressed images, so if an
1805      * unallocated area is shorter than that, we must consider the whole
1806      * cluster allocated. */
1807     if (s->compressed) {
1808         if (n < s->cluster_sectors) {
1809             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1810             s->status = BLK_DATA;
1811         } else {
1812             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1813         }
1814     }
1815 
1816     return n;
1817 }
1818 
1819 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1820                                         int nb_sectors, uint8_t *buf)
1821 {
1822     uint64_t single_read_until = 0;
1823     int n, ret;
1824 
1825     assert(nb_sectors <= s->buf_sectors);
1826     while (nb_sectors > 0) {
1827         BlockBackend *blk;
1828         int src_cur;
1829         int64_t bs_sectors, src_cur_offset;
1830         uint64_t offset;
1831 
1832         /* In the case of compression with multiple source files, we can get a
1833          * nb_sectors that spreads into the next part. So we must be able to
1834          * read across multiple BDSes for one convert_read() call. */
1835         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1836         blk = s->src[src_cur];
1837         bs_sectors = s->src_sectors[src_cur];
1838 
1839         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1840 
1841         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1842         if (single_read_until > offset) {
1843             n = 1;
1844         }
1845 
1846         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1847         if (ret < 0) {
1848             if (s->salvage) {
1849                 if (n > 1) {
1850                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1851                     continue;
1852                 } else {
1853                     if (!s->quiet) {
1854                         warn_report("error while reading offset %" PRIu64
1855                                     ": %s", offset, strerror(-ret));
1856                     }
1857                     memset(buf, 0, BDRV_SECTOR_SIZE);
1858                 }
1859             } else {
1860                 return ret;
1861             }
1862         }
1863 
1864         sector_num += n;
1865         nb_sectors -= n;
1866         buf += n * BDRV_SECTOR_SIZE;
1867     }
1868 
1869     return 0;
1870 }
1871 
1872 
1873 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1874                                          int nb_sectors, uint8_t *buf,
1875                                          enum ImgConvertBlockStatus status)
1876 {
1877     int ret;
1878 
1879     while (nb_sectors > 0) {
1880         int n = nb_sectors;
1881         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1882 
1883         switch (status) {
1884         case BLK_BACKING_FILE:
1885             /* If we have a backing file, leave clusters unallocated that are
1886              * unallocated in the source image, so that the backing file is
1887              * visible at the respective offset. */
1888             assert(s->target_has_backing);
1889             break;
1890 
1891         case BLK_DATA:
1892             /* If we're told to keep the target fully allocated (-S 0) or there
1893              * is real non-zero data, we must write it. Otherwise we can treat
1894              * it as zero sectors.
1895              * Compressed clusters need to be written as a whole, so in that
1896              * case we can only save the write if the buffer is completely
1897              * zeroed. */
1898             if (!s->min_sparse ||
1899                 (!s->compressed &&
1900                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1901                                           sector_num, s->alignment)) ||
1902                 (s->compressed &&
1903                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1904             {
1905                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1906                                     n << BDRV_SECTOR_BITS, buf, flags);
1907                 if (ret < 0) {
1908                     return ret;
1909                 }
1910                 break;
1911             }
1912             /* fall-through */
1913 
1914         case BLK_ZERO:
1915             if (s->has_zero_init) {
1916                 assert(!s->target_has_backing);
1917                 break;
1918             }
1919             ret = blk_co_pwrite_zeroes(s->target,
1920                                        sector_num << BDRV_SECTOR_BITS,
1921                                        n << BDRV_SECTOR_BITS,
1922                                        BDRV_REQ_MAY_UNMAP);
1923             if (ret < 0) {
1924                 return ret;
1925             }
1926             break;
1927         }
1928 
1929         sector_num += n;
1930         nb_sectors -= n;
1931         buf += n * BDRV_SECTOR_SIZE;
1932     }
1933 
1934     return 0;
1935 }
1936 
1937 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1938                                               int nb_sectors)
1939 {
1940     int n, ret;
1941 
1942     while (nb_sectors > 0) {
1943         BlockBackend *blk;
1944         int src_cur;
1945         int64_t bs_sectors, src_cur_offset;
1946         int64_t offset;
1947 
1948         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1949         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1950         blk = s->src[src_cur];
1951         bs_sectors = s->src_sectors[src_cur];
1952 
1953         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1954 
1955         ret = blk_co_copy_range(blk, offset, s->target,
1956                                 sector_num << BDRV_SECTOR_BITS,
1957                                 n << BDRV_SECTOR_BITS, 0, 0);
1958         if (ret < 0) {
1959             return ret;
1960         }
1961 
1962         sector_num += n;
1963         nb_sectors -= n;
1964     }
1965     return 0;
1966 }
1967 
1968 static void coroutine_fn convert_co_do_copy(void *opaque)
1969 {
1970     ImgConvertState *s = opaque;
1971     uint8_t *buf = NULL;
1972     int ret, i;
1973     int index = -1;
1974 
1975     for (i = 0; i < s->num_coroutines; i++) {
1976         if (s->co[i] == qemu_coroutine_self()) {
1977             index = i;
1978             break;
1979         }
1980     }
1981     assert(index >= 0);
1982 
1983     s->running_coroutines++;
1984     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1985 
1986     while (1) {
1987         int n;
1988         int64_t sector_num;
1989         enum ImgConvertBlockStatus status;
1990         bool copy_range;
1991 
1992         qemu_co_mutex_lock(&s->lock);
1993         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1994             qemu_co_mutex_unlock(&s->lock);
1995             break;
1996         }
1997         WITH_GRAPH_RDLOCK_GUARD() {
1998             n = convert_iteration_sectors(s, s->sector_num);
1999         }
2000         if (n < 0) {
2001             qemu_co_mutex_unlock(&s->lock);
2002             s->ret = n;
2003             break;
2004         }
2005         /* save current sector and allocation status to local variables */
2006         sector_num = s->sector_num;
2007         status = s->status;
2008         if (!s->min_sparse && s->status == BLK_ZERO) {
2009             n = MIN(n, s->buf_sectors);
2010         }
2011         /* increment global sector counter so that other coroutines can
2012          * already continue reading beyond this request */
2013         s->sector_num += n;
2014         qemu_co_mutex_unlock(&s->lock);
2015 
2016         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
2017             s->allocated_done += n;
2018             qemu_progress_print(100.0 * s->allocated_done /
2019                                         s->allocated_sectors, 0);
2020         }
2021 
2022 retry:
2023         copy_range = s->copy_range && s->status == BLK_DATA;
2024         if (status == BLK_DATA && !copy_range) {
2025             ret = convert_co_read(s, sector_num, n, buf);
2026             if (ret < 0) {
2027                 error_report("error while reading at byte %lld: %s",
2028                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2029                 s->ret = ret;
2030             }
2031         } else if (!s->min_sparse && status == BLK_ZERO) {
2032             status = BLK_DATA;
2033             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
2034         }
2035 
2036         if (s->wr_in_order) {
2037             /* keep writes in order */
2038             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
2039                 s->wait_sector_num[index] = sector_num;
2040                 qemu_coroutine_yield();
2041             }
2042             s->wait_sector_num[index] = -1;
2043         }
2044 
2045         if (s->ret == -EINPROGRESS) {
2046             if (copy_range) {
2047                 WITH_GRAPH_RDLOCK_GUARD() {
2048                     ret = convert_co_copy_range(s, sector_num, n);
2049                 }
2050                 if (ret) {
2051                     s->copy_range = false;
2052                     goto retry;
2053                 }
2054             } else {
2055                 ret = convert_co_write(s, sector_num, n, buf, status);
2056             }
2057             if (ret < 0) {
2058                 error_report("error while writing at byte %lld: %s",
2059                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2060                 s->ret = ret;
2061             }
2062         }
2063 
2064         if (s->wr_in_order) {
2065             /* reenter the coroutine that might have waited
2066              * for this write to complete */
2067             s->wr_offs = sector_num + n;
2068             for (i = 0; i < s->num_coroutines; i++) {
2069                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
2070                     /*
2071                      * A -> B -> A cannot occur because A has
2072                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
2073                      * B will never enter A during this time window.
2074                      */
2075                     qemu_coroutine_enter(s->co[i]);
2076                     break;
2077                 }
2078             }
2079         }
2080     }
2081 
2082     qemu_vfree(buf);
2083     s->co[index] = NULL;
2084     s->running_coroutines--;
2085     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
2086         /* the convert job finished successfully */
2087         s->ret = 0;
2088     }
2089 }
2090 
2091 static int convert_do_copy(ImgConvertState *s)
2092 {
2093     int ret, i, n;
2094     int64_t sector_num = 0;
2095 
2096     /* Check whether we have zero initialisation or can get it efficiently */
2097     if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
2098         !s->target_has_backing) {
2099         bdrv_graph_rdlock_main_loop();
2100         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
2101         bdrv_graph_rdunlock_main_loop();
2102     }
2103 
2104     /* Allocate buffer for copied data. For compressed images, only one cluster
2105      * can be copied at a time. */
2106     if (s->compressed) {
2107         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2108             error_report("invalid cluster size");
2109             return -EINVAL;
2110         }
2111         s->buf_sectors = s->cluster_sectors;
2112     }
2113 
2114     while (sector_num < s->total_sectors) {
2115         bdrv_graph_rdlock_main_loop();
2116         n = convert_iteration_sectors(s, sector_num);
2117         bdrv_graph_rdunlock_main_loop();
2118         if (n < 0) {
2119             return n;
2120         }
2121         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2122         {
2123             s->allocated_sectors += n;
2124         }
2125         sector_num += n;
2126     }
2127 
2128     /* Do the copy */
2129     s->sector_next_status = 0;
2130     s->ret = -EINPROGRESS;
2131 
2132     qemu_co_mutex_init(&s->lock);
2133     for (i = 0; i < s->num_coroutines; i++) {
2134         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2135         s->wait_sector_num[i] = -1;
2136         qemu_coroutine_enter(s->co[i]);
2137     }
2138 
2139     while (s->running_coroutines) {
2140         main_loop_wait(false);
2141     }
2142 
2143     if (s->compressed && !s->ret) {
2144         /* signal EOF to align */
2145         ret = blk_pwrite_compressed(s->target, 0, 0, NULL);
2146         if (ret < 0) {
2147             return ret;
2148         }
2149     }
2150 
2151     return s->ret;
2152 }
2153 
2154 /* Check that bitmaps can be copied, or output an error */
2155 static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken)
2156 {
2157     BdrvDirtyBitmap *bm;
2158 
2159     if (!bdrv_supports_persistent_dirty_bitmap(src)) {
2160         error_report("Source lacks bitmap support");
2161         return -1;
2162     }
2163     FOR_EACH_DIRTY_BITMAP(src, bm) {
2164         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2165             continue;
2166         }
2167         if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2168             error_report("Cannot copy inconsistent bitmap '%s'",
2169                          bdrv_dirty_bitmap_name(bm));
2170             error_printf("Try --skip-broken-bitmaps, or "
2171                          "use 'qemu-img bitmap --remove' to delete it\n");
2172             return -1;
2173         }
2174     }
2175     return 0;
2176 }
2177 
2178 static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst,
2179                                 bool skip_broken)
2180 {
2181     BdrvDirtyBitmap *bm;
2182     Error *err = NULL;
2183 
2184     FOR_EACH_DIRTY_BITMAP(src, bm) {
2185         const char *name;
2186 
2187         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2188             continue;
2189         }
2190         name = bdrv_dirty_bitmap_name(bm);
2191         if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2192             warn_report("Skipping inconsistent bitmap '%s'", name);
2193             continue;
2194         }
2195         qmp_block_dirty_bitmap_add(dst->node_name, name,
2196                                    true, bdrv_dirty_bitmap_granularity(bm),
2197                                    true, true,
2198                                    true, !bdrv_dirty_bitmap_enabled(bm),
2199                                    &err);
2200         if (err) {
2201             error_reportf_err(err, "Failed to create bitmap %s: ", name);
2202             return -1;
2203         }
2204 
2205         do_dirty_bitmap_merge(dst->node_name, name, src->node_name, name,
2206                               &err);
2207         if (err) {
2208             error_reportf_err(err, "Failed to populate bitmap %s: ", name);
2209             qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL);
2210             return -1;
2211         }
2212     }
2213 
2214     return 0;
2215 }
2216 
2217 #define MAX_BUF_SECTORS 32768
2218 
2219 static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
2220 {
2221     ThrottleConfig cfg;
2222 
2223     throttle_config_init(&cfg);
2224     cfg.buckets[THROTTLE_BPS_WRITE].avg = rate_limit;
2225 
2226     blk_io_limits_enable(blk, CONVERT_THROTTLE_GROUP);
2227     blk_set_io_limits(blk, &cfg);
2228 }
2229 
2230 static int img_convert(const img_cmd_t *ccmd, int argc, char **argv)
2231 {
2232     int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
2233     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2234                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2235                *out_filename, *out_baseimg_param, *snapshot_name = NULL,
2236                *backing_fmt = NULL;
2237     BlockDriver *drv = NULL, *proto_drv = NULL;
2238     BlockDriverInfo bdi;
2239     BlockDriverState *out_bs;
2240     QemuOpts *opts = NULL, *sn_opts = NULL;
2241     QemuOptsList *create_opts = NULL;
2242     QDict *open_opts = NULL;
2243     char *options = NULL;
2244     Error *local_err = NULL;
2245     bool writethrough, src_writethrough, image_opts = false,
2246          skip_create = false, progress = false, tgt_image_opts = false;
2247     int64_t ret = -EINVAL;
2248     bool force_share = false;
2249     bool explict_min_sparse = false;
2250     bool bitmaps = false;
2251     bool skip_broken = false;
2252     int64_t rate_limit = 0;
2253 
2254     ImgConvertState s = (ImgConvertState) {
2255         /* Need at least 4k of zeros for sparse detection */
2256         .min_sparse         = 8,
2257         .copy_range         = false,
2258         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2259         .wr_in_order        = true,
2260         .num_coroutines     = 8,
2261     };
2262 
2263     for(;;) {
2264         static const struct option long_options[] = {
2265             {"help", no_argument, 0, 'h'},
2266             {"source-format", required_argument, 0, 'f'},
2267             /*
2268              * XXX: historic --image-opts acts on source file only,
2269              * it seems better to have it affect both source and target,
2270              * and have separate --source-image-opts for source,
2271              * but this might break existing setups.
2272              */
2273             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2274             {"source-cache", required_argument, 0, 'T'},
2275             {"snapshot", required_argument, 0, 'l'},
2276             {"bitmaps", no_argument, 0, OPTION_BITMAPS},
2277             {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
2278             {"salvage", no_argument, 0, OPTION_SALVAGE},
2279             {"target-format", required_argument, 0, 'O'},
2280             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2281             {"target-format-options", required_argument, 0, 'o'},
2282             {"target-cache", required_argument, 0, 't'},
2283             {"backing", required_argument, 0, 'b'},
2284             {"backing-format", required_argument, 0, 'F'},
2285             {"sparse-size", required_argument, 0, 'S'},
2286             {"no-create", no_argument, 0, 'n'},
2287             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2288             {"force-share", no_argument, 0, 'U'},
2289             {"rate-limit", required_argument, 0, 'r'},
2290             {"parallel", required_argument, 0, 'm'},
2291             {"oob-writes", no_argument, 0, 'W'},
2292             {"copy-range-offloading", no_argument, 0, 'C'},
2293             {"progress", no_argument, 0, 'p'},
2294             {"quiet", no_argument, 0, 'q'},
2295             {"object", required_argument, 0, OPTION_OBJECT},
2296             {0, 0, 0, 0}
2297         };
2298         c = getopt_long(argc, argv, "hf:O:b:B:CcF:o:l:S:pt:T:nm:WUr:q",
2299                         long_options, NULL);
2300         if (c == -1) {
2301             break;
2302         }
2303         switch (c) {
2304         case 'h':
2305             cmd_help(ccmd, "[-f SRC_FMT | --image-opts] [-T SRC_CACHE]\n"
2306 "        [-l SNAPSHOT] [--bitmaps [--skip-broken-bitmaps]] [--salvage]\n"
2307 "        [-O TGT_FMT | --target-image-opts] [-o TGT_FMT_OPTS] [-t TGT_CACHE]\n"
2308 "        [-b BACKING_FILE [-F BACKING_FMT]] [-S SPARSE_SIZE]\n"
2309 "        [-n] [--target-is-zero] [-c]\n"
2310 "        [-U] [-r RATE] [-m NUM_PARALLEL] [-W] [-C] [-p] [-q] [--object OBJDEF]\n"
2311 "        SRC_FILE [SRC_FILE2...] TGT_FILE\n"
2312 ,
2313 "  -f, --source-format SRC_FMT\n"
2314 "     specify format of all SRC_FILEs explicitly (default: probing is used)\n"
2315 "  --image-opts\n"
2316 "     treat each SRC_FILE as an option string (key=value,...), not a file name\n"
2317 "     (incompatible with -f|--source-format)\n"
2318 "  -T, --source-cache SRC_CACHE\n"
2319 "     source image(s) cache mode (" BDRV_DEFAULT_CACHE ")\n"
2320 "  -l, --snapshot SNAPSHOT\n"
2321 "     specify source snapshot\n"
2322 "  --bitmaps\n"
2323 "     also copy any persistent bitmaps present in source\n"
2324 "  --skip-broken-bitmaps\n"
2325 "     skip (do not error out) any broken bitmaps\n"
2326 "  --salvage\n"
2327 "     ignore errors on input (convert unreadable areas to zeros)\n"
2328 "  -O, --target-format TGT_FMT\n"
2329 "     specify TGT_FILE image format (default: raw)\n"
2330 "  --target-image-opts\n"
2331 "     treat TGT_FILE as an option string (key=value,...), not a file name\n"
2332 "     (incompatible with -O|--target-format)\n"
2333 "  -o, --target-format-options TGT_FMT_OPTS\n"
2334 "     TGT_FMT-specific options\n"
2335 "  -t, --target-cache TGT_CACHE\n"
2336 "     cache mode when opening output image (default: unsafe)\n"
2337 "  -b, --backing BACKING_FILE (was -B in <= 10.0)\n"
2338 "     create target image to be a CoW on top of BACKING_FILE\n"
2339 "  -F, --backing-format BACKING_FMT\n" /* -B used for -b in <=10.0 */
2340 "     specify BACKING_FILE image format explicitly (default: probing is used)\n"
2341 "  -S, --sparse-size SPARSE_SIZE[bkKMGTPE]\n"
2342 "     specify number of consecutive zero bytes to treat as a gap on output\n"
2343 "     (rounded down to nearest 512 bytes), with optional multiplier suffix\n"
2344 "  -n, --no-create\n"
2345 "     omit target volume creation (e.g. on rbd)\n"
2346 "  --target-is-zero\n"
2347 "     indicates that the target volume is pre-zeroed\n"
2348 "  -c, --compress\n"
2349 "     create compressed output image (qcow and qcow2 formats only)\n"
2350 "  -U, --force-share\n"
2351 "     open images in shared mode for concurrent access\n"
2352 "  -r, --rate-limit RATE\n"
2353 "     I/O rate limit, in bytes per second\n"
2354 "  -m, --parallel NUM_PARALLEL\n"
2355 "     specify parallelism (default: 8)\n"
2356 "  -C, --copy-range-offloading\n"
2357 "     try to use copy offloading\n"
2358 "  -W, --oob-writes\n"
2359 "     enable out-of-order writes to improve performance\n"
2360 "  -p, --progress\n"
2361 "     display progress information\n"
2362 "  -q, --quiet\n"
2363 "     quiet mode (produce only error messages if any)\n"
2364 "  --object OBJDEF\n"
2365 "     defines QEMU user-creatable object\n"
2366 "  SRC_FILE...\n"
2367 "     one or more source image file names,\n"
2368 "     or option strings (key=value,..) with --source-image-opts\n"
2369 "  TGT_FILE\n"
2370 "     target (output) image file name,\n"
2371 "     or option string (key=value,..) with --target-image-opts\n"
2372 );
2373             break;
2374         case 'f':
2375             fmt = optarg;
2376             break;
2377         case OPTION_IMAGE_OPTS:
2378             image_opts = true;
2379             break;
2380         case 'T':
2381             src_cache = optarg;
2382             break;
2383         case 'l':
2384             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2385                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2386                                                   optarg, false);
2387                 if (!sn_opts) {
2388                     error_report("Failed in parsing snapshot param '%s'",
2389                                  optarg);
2390                     goto fail_getopt;
2391                 }
2392             } else {
2393                 snapshot_name = optarg;
2394             }
2395             break;
2396         case OPTION_BITMAPS:
2397             bitmaps = true;
2398             break;
2399         case OPTION_SKIP_BROKEN:
2400             skip_broken = true;
2401             break;
2402         case OPTION_SALVAGE:
2403             s.salvage = true;
2404             break;
2405          case 'O':
2406             out_fmt = optarg;
2407             break;
2408         case OPTION_TARGET_IMAGE_OPTS:
2409             tgt_image_opts = true;
2410             break;
2411         case 'o':
2412             if (accumulate_options(&options, optarg) < 0) {
2413                 goto fail_getopt;
2414             }
2415             break;
2416         case 't':
2417             cache = optarg;
2418             break;
2419         case 'B': /* <=10.0 */
2420         case 'b':
2421             out_baseimg = optarg;
2422             break;
2423         case 'F': /* can't use -B as it used as -b in <=10.0 */
2424             backing_fmt = optarg;
2425             break;
2426         case 'S':
2427         {
2428             int64_t sval;
2429 
2430             sval = cvtnum("buffer size for sparse output", optarg, true);
2431             if (sval < 0) {
2432                 goto fail_getopt;
2433             } else if (!QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2434                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2435                 error_report("Invalid buffer size for sparse output specified. "
2436                     "Valid sizes are multiples of %llu up to %llu. Select "
2437                     "0 to disable sparse detection (fully allocates output).",
2438                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2439                 goto fail_getopt;
2440             }
2441 
2442             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2443             explict_min_sparse = true;
2444             break;
2445         }
2446         case 'n':
2447             skip_create = true;
2448             break;
2449         case OPTION_TARGET_IS_ZERO:
2450             /*
2451              * The user asserting that the target is blank has the
2452              * same effect as the target driver supporting zero
2453              * initialisation.
2454              */
2455             s.has_zero_init = true;
2456             break;
2457         case 'c':
2458             s.compressed = true;
2459             break;
2460         case 'U':
2461             force_share = true;
2462             break;
2463         case 'r':
2464             rate_limit = cvtnum("rate limit", optarg, true);
2465             if (rate_limit < 0) {
2466                 goto fail_getopt;
2467             }
2468             break;
2469         case 'm':
2470             s.num_coroutines = cvtnum_full("number of coroutines", optarg,
2471                                            false, 1, MAX_COROUTINES);
2472             if (s.num_coroutines < 0) {
2473                 goto fail_getopt;
2474             }
2475             break;
2476         case 'W':
2477             s.wr_in_order = false;
2478             break;
2479         case 'C':
2480             s.copy_range = true;
2481             break;
2482         case 'p':
2483             progress = true;
2484             break;
2485         case 'q':
2486             s.quiet = true;
2487             break;
2488         case OPTION_OBJECT:
2489             user_creatable_process_cmdline(optarg);
2490             break;
2491         default:
2492             tryhelp(argv[0]);
2493         }
2494     }
2495 
2496     if (!out_fmt && !tgt_image_opts) {
2497         out_fmt = "raw";
2498     }
2499 
2500     if (skip_broken && !bitmaps) {
2501         error_report("Use of --skip-broken-bitmaps requires --bitmaps");
2502         goto fail_getopt;
2503     }
2504 
2505     if (s.compressed && s.copy_range) {
2506         error_report("Cannot enable copy offloading when -c is used");
2507         goto fail_getopt;
2508     }
2509 
2510     if (explict_min_sparse && s.copy_range) {
2511         error_report("Cannot enable copy offloading when -S is used");
2512         goto fail_getopt;
2513     }
2514 
2515     if (s.copy_range && s.salvage) {
2516         error_report("Cannot use copy offloading in salvaging mode");
2517         goto fail_getopt;
2518     }
2519 
2520     if (tgt_image_opts && !skip_create) {
2521         error_report("--target-image-opts requires use of -n flag");
2522         goto fail_getopt;
2523     }
2524 
2525     if (skip_create && options) {
2526         error_report("-o has no effect when skipping image creation");
2527         goto fail_getopt;
2528     }
2529 
2530     if (s.has_zero_init && !skip_create) {
2531         error_report("--target-is-zero requires use of -n flag");
2532         goto fail_getopt;
2533     }
2534 
2535     s.src_num = argc - optind - 1;
2536     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2537 
2538     if (options && has_help_option(options)) {
2539         if (out_fmt) {
2540             ret = print_block_option_help(out_filename, out_fmt);
2541             goto fail_getopt;
2542         } else {
2543             error_report("Option help requires a format be specified");
2544             goto fail_getopt;
2545         }
2546     }
2547 
2548     if (s.src_num < 1) {
2549         error_report("Must specify image file name");
2550         goto fail_getopt;
2551     }
2552 
2553     /* ret is still -EINVAL until here */
2554     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2555     if (ret < 0) {
2556         error_report("Invalid source cache option: %s", src_cache);
2557         goto fail_getopt;
2558     }
2559 
2560     /* Initialize before goto out */
2561     if (s.quiet) {
2562         progress = false;
2563     }
2564     qemu_progress_init(progress, 1.0);
2565     qemu_progress_print(0, 100);
2566 
2567     s.src = g_new0(BlockBackend *, s.src_num);
2568     s.src_sectors = g_new(int64_t, s.src_num);
2569     s.src_alignment = g_new(int, s.src_num);
2570 
2571     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2572         BlockDriverState *src_bs;
2573         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2574                                fmt, src_flags, src_writethrough, s.quiet,
2575                                force_share);
2576         if (!s.src[bs_i]) {
2577             ret = -1;
2578             goto out;
2579         }
2580         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2581         if (s.src_sectors[bs_i] < 0) {
2582             error_report("Could not get size of %s: %s",
2583                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2584             ret = -1;
2585             goto out;
2586         }
2587         src_bs = blk_bs(s.src[bs_i]);
2588         s.src_alignment[bs_i] = DIV_ROUND_UP(src_bs->bl.request_alignment,
2589                                              BDRV_SECTOR_SIZE);
2590         if (!bdrv_get_info(src_bs, &bdi)) {
2591             s.src_alignment[bs_i] = MAX(s.src_alignment[bs_i],
2592                                         bdi.cluster_size / BDRV_SECTOR_SIZE);
2593         }
2594         s.total_sectors += s.src_sectors[bs_i];
2595     }
2596 
2597     if (sn_opts) {
2598         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2599                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2600                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2601                                &local_err);
2602     } else if (snapshot_name != NULL) {
2603         if (s.src_num > 1) {
2604             error_report("No support for concatenating multiple snapshot");
2605             ret = -1;
2606             goto out;
2607         }
2608 
2609         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2610                                              &local_err);
2611     }
2612     if (local_err) {
2613         error_reportf_err(local_err, "Failed to load snapshot: ");
2614         ret = -1;
2615         goto out;
2616     }
2617 
2618     if (!skip_create) {
2619         /* Find driver and parse its options */
2620         drv = bdrv_find_format(out_fmt);
2621         if (!drv) {
2622             error_report("Unknown file format '%s'", out_fmt);
2623             ret = -1;
2624             goto out;
2625         }
2626 
2627         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2628         if (!proto_drv) {
2629             error_report_err(local_err);
2630             ret = -1;
2631             goto out;
2632         }
2633 
2634         if (!drv->create_opts) {
2635             error_report("Format driver '%s' does not support image creation",
2636                          drv->format_name);
2637             ret = -1;
2638             goto out;
2639         }
2640 
2641         if (!proto_drv->create_opts) {
2642             error_report("Protocol driver '%s' does not support image creation",
2643                          proto_drv->format_name);
2644             ret = -1;
2645             goto out;
2646         }
2647 
2648         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2649         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2650 
2651         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2652         if (options) {
2653             if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
2654                 error_report_err(local_err);
2655                 ret = -1;
2656                 goto out;
2657             }
2658         }
2659 
2660         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
2661                             s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
2662         ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
2663         if (ret < 0) {
2664             goto out;
2665         }
2666     }
2667 
2668     /* Get backing file name if -o backing_file was used */
2669     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2670     if (out_baseimg_param) {
2671         out_baseimg = out_baseimg_param;
2672     }
2673     s.target_has_backing = (bool) out_baseimg;
2674 
2675     if (s.has_zero_init && s.target_has_backing) {
2676         error_report("Cannot use --target-is-zero when the destination "
2677                      "image has a backing file");
2678         goto out;
2679     }
2680 
2681     if (s.src_num > 1 && out_baseimg) {
2682         error_report("Having a backing file for the target makes no sense when "
2683                      "concatenating multiple input images");
2684         ret = -1;
2685         goto out;
2686     }
2687 
2688     if (out_baseimg_param) {
2689         if (!qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT)) {
2690             error_report("Use of backing file requires explicit "
2691                          "backing format");
2692             ret = -1;
2693             goto out;
2694         }
2695     }
2696 
2697     /* Check if compression is supported */
2698     if (s.compressed) {
2699         bool encryption =
2700             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2701         const char *encryptfmt =
2702             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2703         const char *preallocation =
2704             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2705 
2706         if (drv && !block_driver_can_compress(drv)) {
2707             error_report("Compression not supported for this file format");
2708             ret = -1;
2709             goto out;
2710         }
2711 
2712         if (encryption || encryptfmt) {
2713             error_report("Compression and encryption not supported at "
2714                          "the same time");
2715             ret = -1;
2716             goto out;
2717         }
2718 
2719         if (preallocation
2720             && strcmp(preallocation, "off"))
2721         {
2722             error_report("Compression and preallocation not supported at "
2723                          "the same time");
2724             ret = -1;
2725             goto out;
2726         }
2727     }
2728 
2729     /* Determine if bitmaps need copying */
2730     if (bitmaps) {
2731         if (s.src_num > 1) {
2732             error_report("Copying bitmaps only possible with single source");
2733             ret = -1;
2734             goto out;
2735         }
2736         ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken);
2737         if (ret < 0) {
2738             goto out;
2739         }
2740     }
2741 
2742     /*
2743      * The later open call will need any decryption secrets, and
2744      * bdrv_create() will purge "opts", so extract them now before
2745      * they are lost.
2746      */
2747     if (!skip_create) {
2748         open_opts = qdict_new();
2749         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2750 
2751         /* Create the new image */
2752         ret = bdrv_create(drv, out_filename, opts, &local_err);
2753         if (ret < 0) {
2754             error_reportf_err(local_err, "%s: error while converting %s: ",
2755                               out_filename, out_fmt);
2756             goto out;
2757         }
2758     }
2759 
2760     s.target_is_new = !skip_create;
2761 
2762     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2763     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2764     if (ret < 0) {
2765         error_report("Invalid cache option: %s", cache);
2766         goto out;
2767     }
2768 
2769     if (flags & BDRV_O_NOCACHE) {
2770         /*
2771          * If we open the target with O_DIRECT, it may be necessary to
2772          * extend its size to align to the physical sector size.
2773          */
2774         flags |= BDRV_O_RESIZE;
2775     }
2776 
2777     if (skip_create) {
2778         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2779                             flags, writethrough, s.quiet, false);
2780     } else {
2781         /* TODO ultimately we should allow --target-image-opts
2782          * to be used even when -n is not given.
2783          * That has to wait for bdrv_create to be improved
2784          * to allow filenames in option syntax
2785          */
2786         s.target = img_open_file(out_filename, open_opts, out_fmt,
2787                                  flags, writethrough, s.quiet, false);
2788         open_opts = NULL; /* blk_new_open will have freed it */
2789     }
2790     if (!s.target) {
2791         ret = -1;
2792         goto out;
2793     }
2794     out_bs = blk_bs(s.target);
2795 
2796     if (bitmaps && !bdrv_supports_persistent_dirty_bitmap(out_bs)) {
2797         error_report("Format driver '%s' does not support bitmaps",
2798                      out_bs->drv->format_name);
2799         ret = -1;
2800         goto out;
2801     }
2802 
2803     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2804         error_report("Compression not supported for this file format");
2805         ret = -1;
2806         goto out;
2807     }
2808 
2809     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2810      * or discard_alignment of the out_bs is greater. Limit to
2811      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2812     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2813                         MAX(s.buf_sectors,
2814                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2815                                 out_bs->bl.pdiscard_alignment >>
2816                                 BDRV_SECTOR_BITS)));
2817 
2818     /* try to align the write requests to the destination to avoid unnecessary
2819      * RMW cycles. */
2820     s.alignment = MAX(pow2floor(s.min_sparse),
2821                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2822                                    BDRV_SECTOR_SIZE));
2823     assert(is_power_of_2(s.alignment));
2824 
2825     if (skip_create) {
2826         int64_t output_sectors = blk_nb_sectors(s.target);
2827         if (output_sectors < 0) {
2828             error_report("unable to get output image length: %s",
2829                          strerror(-output_sectors));
2830             ret = -1;
2831             goto out;
2832         } else if (output_sectors < s.total_sectors) {
2833             error_report("output file is smaller than input file");
2834             ret = -1;
2835             goto out;
2836         }
2837     }
2838 
2839     if (s.target_has_backing && s.target_is_new) {
2840         /* Errors are treated as "backing length unknown" (which means
2841          * s.target_backing_sectors has to be negative, which it will
2842          * be automatically).  The backing file length is used only
2843          * for optimizations, so such a case is not fatal. */
2844         bdrv_graph_rdlock_main_loop();
2845         s.target_backing_sectors =
2846             bdrv_nb_sectors(bdrv_backing_chain_next(out_bs));
2847         bdrv_graph_rdunlock_main_loop();
2848     } else {
2849         s.target_backing_sectors = -1;
2850     }
2851 
2852     ret = bdrv_get_info(out_bs, &bdi);
2853     if (ret < 0) {
2854         if (s.compressed) {
2855             error_report("could not get block driver info");
2856             goto out;
2857         }
2858     } else {
2859         s.compressed = s.compressed || bdi.needs_compressed_writes;
2860         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2861     }
2862 
2863     if (rate_limit) {
2864         set_rate_limit(s.target, rate_limit);
2865     }
2866 
2867     ret = convert_do_copy(&s);
2868 
2869     /* Now copy the bitmaps */
2870     if (bitmaps && ret == 0) {
2871         ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken);
2872     }
2873 
2874 out:
2875     if (!ret) {
2876         qemu_progress_print(100, 0);
2877     }
2878     qemu_progress_end();
2879     qemu_opts_del(opts);
2880     qemu_opts_free(create_opts);
2881     qobject_unref(open_opts);
2882     blk_unref(s.target);
2883     if (s.src) {
2884         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2885             blk_unref(s.src[bs_i]);
2886         }
2887         g_free(s.src);
2888     }
2889     g_free(s.src_sectors);
2890     g_free(s.src_alignment);
2891 fail_getopt:
2892     qemu_opts_del(sn_opts);
2893     g_free(options);
2894 
2895     return !!ret;
2896 }
2897 
2898 
2899 static void dump_snapshots(BlockDriverState *bs)
2900 {
2901     QEMUSnapshotInfo *sn_tab, *sn;
2902     int nb_sns, i;
2903 
2904     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2905     if (nb_sns <= 0)
2906         return;
2907     printf("Snapshot list:\n");
2908     bdrv_snapshot_dump(NULL);
2909     printf("\n");
2910     for(i = 0; i < nb_sns; i++) {
2911         sn = &sn_tab[i];
2912         bdrv_snapshot_dump(sn);
2913         printf("\n");
2914     }
2915     g_free(sn_tab);
2916 }
2917 
2918 static void dump_json_block_graph_info_list(BlockGraphInfoList *list)
2919 {
2920     GString *str;
2921     QObject *obj;
2922     Visitor *v = qobject_output_visitor_new(&obj);
2923 
2924     visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort);
2925     visit_complete(v, &obj);
2926     str = qobject_to_json_pretty(obj, true);
2927     assert(str != NULL);
2928     printf("%s\n", str->str);
2929     qobject_unref(obj);
2930     visit_free(v);
2931     g_string_free(str, true);
2932 }
2933 
2934 static void dump_json_block_graph_info(BlockGraphInfo *info)
2935 {
2936     GString *str;
2937     QObject *obj;
2938     Visitor *v = qobject_output_visitor_new(&obj);
2939 
2940     visit_type_BlockGraphInfo(v, NULL, &info, &error_abort);
2941     visit_complete(v, &obj);
2942     str = qobject_to_json_pretty(obj, true);
2943     assert(str != NULL);
2944     printf("%s\n", str->str);
2945     qobject_unref(obj);
2946     visit_free(v);
2947     g_string_free(str, true);
2948 }
2949 
2950 static void dump_human_image_info(BlockGraphInfo *info, int indentation,
2951                                   const char *path)
2952 {
2953     BlockChildInfoList *children_list;
2954 
2955     bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation,
2956                         info->children == NULL);
2957 
2958     for (children_list = info->children; children_list;
2959          children_list = children_list->next)
2960     {
2961         BlockChildInfo *child = children_list->value;
2962         g_autofree char *child_path = NULL;
2963 
2964         printf("%*sChild node '%s%s':\n",
2965                indentation * 4, "", path, child->name);
2966         child_path = g_strdup_printf("%s%s/", path, child->name);
2967         dump_human_image_info(child->info, indentation + 1, child_path);
2968     }
2969 }
2970 
2971 static void dump_human_image_info_list(BlockGraphInfoList *list)
2972 {
2973     BlockGraphInfoList *elem;
2974     bool delim = false;
2975 
2976     for (elem = list; elem; elem = elem->next) {
2977         if (delim) {
2978             printf("\n");
2979         }
2980         delim = true;
2981 
2982         dump_human_image_info(elem->value, 0, "/");
2983     }
2984 }
2985 
2986 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2987 {
2988     return strcmp(a, b) == 0;
2989 }
2990 
2991 /**
2992  * Open an image file chain and return an BlockGraphInfoList
2993  *
2994  * @filename: topmost image filename
2995  * @fmt: topmost image format (may be NULL to autodetect)
2996  * @chain: true  - enumerate entire backing file chain
2997  *         false - only topmost image file
2998  *
2999  * Returns a list of BlockNodeInfo objects or NULL if there was an error
3000  * opening an image file.  If there was an error a message will have been
3001  * printed to stderr.
3002  */
3003 static BlockGraphInfoList *collect_image_info_list(bool image_opts,
3004                                                    const char *filename,
3005                                                    const char *fmt,
3006                                                    const char *cache,
3007                                                    bool chain, bool limits,
3008                                                    bool force_share)
3009 {
3010     BlockGraphInfoList *head = NULL;
3011     BlockGraphInfoList **tail = &head;
3012     GHashTable *filenames;
3013     Error *err = NULL;
3014     int cache_flags = 0;
3015     bool writethrough = false;
3016     int ret;
3017 
3018     ret = bdrv_parse_cache_mode(cache, &cache_flags, &writethrough);
3019     if (ret < 0) {
3020         error_report("Invalid cache option: %s", cache);
3021         return NULL;
3022     }
3023 
3024     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
3025 
3026     while (filename) {
3027         BlockBackend *blk;
3028         BlockDriverState *bs;
3029         BlockGraphInfo *info;
3030 
3031         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
3032             error_report("Backing file '%s' creates an infinite loop.",
3033                          filename);
3034             goto err;
3035         }
3036         g_hash_table_insert(filenames, (gpointer)filename, NULL);
3037 
3038         blk = img_open(image_opts, filename, fmt,
3039                        BDRV_O_NO_BACKING | BDRV_O_NO_IO | cache_flags,
3040                        writethrough, false, force_share);
3041         if (!blk) {
3042             goto err;
3043         }
3044         bs = blk_bs(blk);
3045 
3046         /*
3047          * Note that the returned BlockGraphInfo object will not have
3048          * information about this image's backing node, because we have opened
3049          * it with BDRV_O_NO_BACKING.  Printing this object will therefore not
3050          * duplicate the backing chain information that we obtain by walking
3051          * the chain manually here.
3052          */
3053         bdrv_graph_rdlock_main_loop();
3054         bdrv_query_block_graph_info(bs, &info, limits, &err);
3055         bdrv_graph_rdunlock_main_loop();
3056 
3057         if (err) {
3058             error_report_err(err);
3059             blk_unref(blk);
3060             goto err;
3061         }
3062 
3063         QAPI_LIST_APPEND(tail, info);
3064 
3065         blk_unref(blk);
3066 
3067         /* Clear parameters that only apply to the topmost image */
3068         filename = fmt = NULL;
3069         image_opts = false;
3070 
3071         if (chain) {
3072             if (info->full_backing_filename) {
3073                 filename = info->full_backing_filename;
3074             } else if (info->backing_filename) {
3075                 error_report("Could not determine absolute backing filename,"
3076                              " but backing filename '%s' present",
3077                              info->backing_filename);
3078                 goto err;
3079             }
3080             if (info->backing_filename_format) {
3081                 fmt = info->backing_filename_format;
3082             }
3083         }
3084     }
3085     g_hash_table_destroy(filenames);
3086     return head;
3087 
3088 err:
3089     qapi_free_BlockGraphInfoList(head);
3090     g_hash_table_destroy(filenames);
3091     return NULL;
3092 }
3093 
3094 static int img_info(const img_cmd_t *ccmd, int argc, char **argv)
3095 {
3096     int c;
3097     OutputFormat output_format = OFORMAT_HUMAN;
3098     bool chain = false;
3099     const char *filename, *fmt;
3100     const char *cache = BDRV_DEFAULT_CACHE;
3101     BlockGraphInfoList *list;
3102     bool image_opts = false;
3103     bool force_share = false;
3104     bool limits = false;
3105 
3106     fmt = NULL;
3107     for(;;) {
3108         static const struct option long_options[] = {
3109             {"help", no_argument, 0, 'h'},
3110             {"format", required_argument, 0, 'f'},
3111             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3112             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
3113             {"cache", required_argument, 0, 't'},
3114             {"force-share", no_argument, 0, 'U'},
3115             {"limits", no_argument, 0, OPTION_LIMITS},
3116             {"output", required_argument, 0, OPTION_OUTPUT},
3117             {"object", required_argument, 0, OPTION_OBJECT},
3118             {0, 0, 0, 0}
3119         };
3120         c = getopt_long(argc, argv, "hf:t:U", long_options, NULL);
3121         if (c == -1) {
3122             break;
3123         }
3124         switch(c) {
3125         case 'h':
3126             cmd_help(ccmd, "[-f FMT | --image-opts] [--backing-chain] [-U]\n"
3127 "        [--output human|json] [--object OBJDEF] FILE\n"
3128 ,
3129 "  -f, --format FMT\n"
3130 "     specify FILE image format explicitly (default: probing is used)\n"
3131 "  --image-opts\n"
3132 "     treat FILE as an option string (key=value,..), not a file name\n"
3133 "     (incompatible with -f|--format)\n"
3134 "  --backing-chain\n"
3135 "     display information about the backing chain for copy-on-write overlays\n"
3136 "  -t, --cache CACHE\n"
3137 "     cache mode for FILE (default: " BDRV_DEFAULT_CACHE ")\n"
3138 "  -U, --force-share\n"
3139 "     open image in shared mode for concurrent access\n"
3140 "  --limits\n"
3141 "     show detected block limits (may depend on options, e.g. cache mode)\n"
3142 "  --output human|json\n"
3143 "     specify output format (default: human)\n"
3144 "  --object OBJDEF\n"
3145 "     defines QEMU user-creatable object\n"
3146 "  FILE\n"
3147 "     name of the image file, or option string (key=value,..)\n"
3148 "     with --image-opts, to operate on\n"
3149 );
3150             break;
3151         case 'f':
3152             fmt = optarg;
3153             break;
3154         case OPTION_IMAGE_OPTS:
3155             image_opts = true;
3156             break;
3157         case OPTION_BACKING_CHAIN:
3158             chain = true;
3159             break;
3160         case 't':
3161             cache = optarg;
3162             break;
3163         case 'U':
3164             force_share = true;
3165             break;
3166         case OPTION_LIMITS:
3167             limits = true;
3168             break;
3169         case OPTION_OUTPUT:
3170             output_format = parse_output_format(argv[0], optarg);
3171             break;
3172         case OPTION_OBJECT:
3173             user_creatable_process_cmdline(optarg);
3174             break;
3175         default:
3176             tryhelp(argv[0]);
3177         }
3178     }
3179     if (optind != argc - 1) {
3180         error_exit(argv[0], "Expecting one image file name");
3181     }
3182     filename = argv[optind++];
3183 
3184     list = collect_image_info_list(image_opts, filename, fmt, cache, chain,
3185                                    limits, force_share);
3186     if (!list) {
3187         return 1;
3188     }
3189 
3190     switch (output_format) {
3191     case OFORMAT_HUMAN:
3192         dump_human_image_info_list(list);
3193         break;
3194     case OFORMAT_JSON:
3195         if (chain) {
3196             dump_json_block_graph_info_list(list);
3197         } else {
3198             dump_json_block_graph_info(list->value);
3199         }
3200         break;
3201     }
3202 
3203     qapi_free_BlockGraphInfoList(list);
3204     return 0;
3205 }
3206 
3207 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
3208                           MapEntry *next)
3209 {
3210     switch (output_format) {
3211     case OFORMAT_HUMAN:
3212         if (e->data && !e->has_offset) {
3213             error_report("File contains external, encrypted or compressed clusters.");
3214             return -1;
3215         }
3216         if (e->data && !e->zero) {
3217             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
3218                    e->start, e->length,
3219                    e->has_offset ? e->offset : 0,
3220                    e->filename ?: "");
3221         }
3222         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
3223          * Modify the flags here to allow more coalescing.
3224          */
3225         if (next && (!next->data || next->zero)) {
3226             next->data = false;
3227             next->zero = true;
3228         }
3229         break;
3230     case OFORMAT_JSON:
3231         printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
3232                " \"depth\": %"PRId64", \"present\": %s, \"zero\": %s,"
3233                " \"data\": %s, \"compressed\": %s",
3234                e->start, e->length, e->depth,
3235                e->present ? "true" : "false",
3236                e->zero ? "true" : "false",
3237                e->data ? "true" : "false",
3238                e->compressed ? "true" : "false");
3239         if (e->has_offset) {
3240             printf(", \"offset\": %"PRId64"", e->offset);
3241         }
3242         putchar('}');
3243 
3244         if (next) {
3245             puts(",");
3246         }
3247         break;
3248     }
3249     return 0;
3250 }
3251 
3252 static int get_block_status(BlockDriverState *bs, int64_t offset,
3253                             int64_t bytes, MapEntry *e)
3254 {
3255     int ret;
3256     int depth;
3257     BlockDriverState *file;
3258     bool has_offset;
3259     int64_t map;
3260     char *filename = NULL;
3261 
3262     GLOBAL_STATE_CODE();
3263     GRAPH_RDLOCK_GUARD_MAINLOOP();
3264 
3265     /* As an optimization, we could cache the current range of unallocated
3266      * clusters in each file of the chain, and avoid querying the same
3267      * range repeatedly.
3268      */
3269 
3270     depth = 0;
3271     for (;;) {
3272         bs = bdrv_skip_filters(bs);
3273         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
3274         if (ret < 0) {
3275             return ret;
3276         }
3277         assert(bytes);
3278         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
3279             break;
3280         }
3281         bs = bdrv_cow_bs(bs);
3282         if (bs == NULL) {
3283             ret = 0;
3284             break;
3285         }
3286 
3287         depth++;
3288     }
3289 
3290     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
3291 
3292     if (file && has_offset) {
3293         bdrv_refresh_filename(file);
3294         filename = file->filename;
3295     }
3296 
3297     *e = (MapEntry) {
3298         .start = offset,
3299         .length = bytes,
3300         .data = !!(ret & BDRV_BLOCK_DATA),
3301         .zero = !!(ret & BDRV_BLOCK_ZERO),
3302         .compressed = !!(ret & BDRV_BLOCK_COMPRESSED),
3303         .offset = map,
3304         .has_offset = has_offset,
3305         .depth = depth,
3306         .present = !!(ret & BDRV_BLOCK_ALLOCATED),
3307         .filename = filename,
3308     };
3309 
3310     return 0;
3311 }
3312 
3313 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
3314 {
3315     if (curr->length == 0) {
3316         return false;
3317     }
3318     if (curr->zero != next->zero ||
3319         curr->data != next->data ||
3320         curr->compressed != next->compressed ||
3321         curr->depth != next->depth ||
3322         curr->present != next->present ||
3323         !curr->filename != !next->filename ||
3324         curr->has_offset != next->has_offset) {
3325         return false;
3326     }
3327     if (curr->filename && strcmp(curr->filename, next->filename)) {
3328         return false;
3329     }
3330     if (curr->has_offset && curr->offset + curr->length != next->offset) {
3331         return false;
3332     }
3333     return true;
3334 }
3335 
3336 static int img_map(const img_cmd_t *ccmd, int argc, char **argv)
3337 {
3338     int c;
3339     OutputFormat output_format = OFORMAT_HUMAN;
3340     BlockBackend *blk;
3341     BlockDriverState *bs;
3342     const char *filename, *fmt;
3343     int64_t length;
3344     MapEntry curr = { .length = 0 }, next;
3345     int ret = 0;
3346     bool image_opts = false;
3347     bool force_share = false;
3348     int64_t start_offset = 0;
3349     int64_t max_length = -1;
3350 
3351     fmt = NULL;
3352     for (;;) {
3353         static const struct option long_options[] = {
3354             {"help", no_argument, 0, 'h'},
3355             {"format", required_argument, 0, 'f'},
3356             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3357             {"start-offset", required_argument, 0, 's'},
3358             {"max-length", required_argument, 0, 'l'},
3359             {"force-share", no_argument, 0, 'U'},
3360             {"output", required_argument, 0, OPTION_OUTPUT},
3361             {"object", required_argument, 0, OPTION_OBJECT},
3362             {0, 0, 0, 0}
3363         };
3364         c = getopt_long(argc, argv, "hf:s:l:U",
3365                         long_options, NULL);
3366         if (c == -1) {
3367             break;
3368         }
3369         switch (c) {
3370         case 'h':
3371             cmd_help(ccmd, "[-f FMT | --image-opts]\n"
3372 "        [--start-offset OFFSET] [--max-length LENGTH]\n"
3373 "        [--output human|json] [-U] [--object OBJDEF] FILE\n"
3374 ,
3375 "  -f, --format FMT\n"
3376 "     specify FILE image format explicitly (default: probing is used)\n"
3377 "  --image-opts\n"
3378 "     treat FILE as an option string (key=value,..), not a file name\n"
3379 "     (incompatible with -f|--format)\n"
3380 "  -s, --start-offset OFFSET\n"
3381 "     start at the given OFFSET in the image, not at the beginning\n"
3382 "  -l, --max-length LENGTH\n"
3383 "     process at most LENGTH bytes instead of up to the end of the image\n"
3384 "  --output human|json\n"
3385 "     specify output format name (default: human)\n"
3386 "  -U, --force-share\n"
3387 "     open image in shared mode for concurrent access\n"
3388 "  --object OBJDEF\n"
3389 "     defines QEMU user-creatable object\n"
3390 "  FILE\n"
3391 "     the image file name, or option string (key=value,..)\n"
3392 "     with --image-opts, to operate on\n"
3393 );
3394             break;
3395         case 'f':
3396             fmt = optarg;
3397             break;
3398         case OPTION_IMAGE_OPTS:
3399             image_opts = true;
3400             break;
3401         case 's':
3402             start_offset = cvtnum("start offset", optarg, true);
3403             if (start_offset < 0) {
3404                 return 1;
3405             }
3406             break;
3407         case 'l':
3408             max_length = cvtnum("max length", optarg, true);
3409             if (max_length < 0) {
3410                 return 1;
3411             }
3412             break;
3413         case OPTION_OUTPUT:
3414             output_format = parse_output_format(argv[0], optarg);
3415             break;
3416         case 'U':
3417             force_share = true;
3418             break;
3419         case OPTION_OBJECT:
3420             user_creatable_process_cmdline(optarg);
3421             break;
3422         default:
3423             tryhelp(argv[0]);
3424         }
3425     }
3426     if (optind != argc - 1) {
3427         error_exit(argv[0], "Expecting one image file name");
3428     }
3429     filename = argv[optind];
3430 
3431     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3432     if (!blk) {
3433         return 1;
3434     }
3435     bs = blk_bs(blk);
3436 
3437     if (output_format == OFORMAT_HUMAN) {
3438         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3439     } else if (output_format == OFORMAT_JSON) {
3440         putchar('[');
3441     }
3442 
3443     length = blk_getlength(blk);
3444     if (length < 0) {
3445         error_report("Failed to get size for '%s'", filename);
3446         return 1;
3447     }
3448     if (max_length != -1) {
3449         length = MIN(start_offset + max_length, length);
3450     }
3451 
3452     curr.start = start_offset;
3453     while (curr.start + curr.length < length) {
3454         int64_t offset = curr.start + curr.length;
3455         int64_t n = length - offset;
3456 
3457         ret = get_block_status(bs, offset, n, &next);
3458         if (ret < 0) {
3459             error_report("Could not read file metadata: %s", strerror(-ret));
3460             goto out;
3461         }
3462 
3463         if (entry_mergeable(&curr, &next)) {
3464             curr.length += next.length;
3465             continue;
3466         }
3467 
3468         if (curr.length > 0) {
3469             ret = dump_map_entry(output_format, &curr, &next);
3470             if (ret < 0) {
3471                 goto out;
3472             }
3473         }
3474         curr = next;
3475     }
3476 
3477     ret = dump_map_entry(output_format, &curr, NULL);
3478     if (output_format == OFORMAT_JSON) {
3479         puts("]");
3480     }
3481 
3482 out:
3483     blk_unref(blk);
3484     return ret < 0;
3485 }
3486 
3487 /* the same as options */
3488 #define SNAPSHOT_LIST   'l'
3489 #define SNAPSHOT_CREATE 'c'
3490 #define SNAPSHOT_APPLY  'a'
3491 #define SNAPSHOT_DELETE 'd'
3492 
3493 static int img_snapshot(const img_cmd_t *ccmd, int argc, char **argv)
3494 {
3495     BlockBackend *blk;
3496     BlockDriverState *bs;
3497     QEMUSnapshotInfo sn;
3498     char *filename, *fmt = NULL, *snapshot_name = NULL;
3499     int c, ret = 0;
3500     int action = 0;
3501     bool quiet = false;
3502     Error *err = NULL;
3503     bool image_opts = false;
3504     bool force_share = false;
3505     int64_t rt;
3506 
3507     /* Parse commandline parameters */
3508     for(;;) {
3509         static const struct option long_options[] = {
3510             {"help", no_argument, 0, 'h'},
3511             {"format", required_argument, 0, 'f'},
3512             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3513             {"list", no_argument, 0, SNAPSHOT_LIST},
3514             {"apply", required_argument, 0, SNAPSHOT_APPLY},
3515             {"create", required_argument, 0, SNAPSHOT_CREATE},
3516             {"delete", required_argument, 0, SNAPSHOT_DELETE},
3517             {"force-share", no_argument, 0, 'U'},
3518             {"quiet", no_argument, 0, 'q'},
3519             {"object", required_argument, 0, OPTION_OBJECT},
3520             {0, 0, 0, 0}
3521         };
3522         c = getopt_long(argc, argv, "hf:la:c:d:Uq",
3523                         long_options, NULL);
3524         if (c == -1) {
3525             break;
3526         }
3527         switch(c) {
3528         case 'h':
3529             cmd_help(ccmd, "[-f FMT | --image-opts] [-l | -a|-c|-d SNAPSHOT]\n"
3530 "        [-U] [-q] [--object OBJDEF] FILE\n"
3531 ,
3532 "  -f, --format FMT\n"
3533 "     specify FILE format explicitly (default: probing is used)\n"
3534 "  --image-opts\n"
3535 "     treat FILE as an option string (key=value,..), not a file name\n"
3536 "     (incompatible with -f|--format)\n"
3537 "  -l, --list\n"
3538 "     list snapshots in FILE (default action if no -l|-c|-a|-d is given)\n"
3539 "  -c, --create SNAPSHOT\n"
3540 "     create named snapshot\n"
3541 "  -a, --apply SNAPSHOT\n"
3542 "     apply named snapshot to the base\n"
3543 "  -d, --delete SNAPSHOT\n"
3544 "     delete named snapshot\n"
3545 "  (only one of -l|-c|-a|-d can be specified)\n"
3546 "  -U, --force-share\n"
3547 "     open image in shared mode for concurrent access\n"
3548 "  -q, --quiet\n"
3549 "     quiet mode (produce only error messages if any)\n"
3550 "  --object OBJDEF\n"
3551 "     defines QEMU user-creatable object\n"
3552 "  FILE\n"
3553 "     name of the image file, or option string (key=value,..)\n"
3554 "     with --image-opts) to operate on\n"
3555 );
3556             break;
3557         case 'f':
3558             fmt = optarg;
3559             break;
3560         case OPTION_IMAGE_OPTS:
3561             image_opts = true;
3562             break;
3563         case SNAPSHOT_LIST:
3564         case SNAPSHOT_APPLY:
3565         case SNAPSHOT_CREATE:
3566         case SNAPSHOT_DELETE:
3567             if (action) {
3568                 error_exit(argv[0], "Cannot mix '-l', '-a', '-c', '-d'");
3569                 return 0;
3570             }
3571             action = c;
3572             snapshot_name = optarg;
3573             break;
3574         case 'U':
3575             force_share = true;
3576             break;
3577         case 'q':
3578             quiet = true;
3579             break;
3580         case OPTION_OBJECT:
3581             user_creatable_process_cmdline(optarg);
3582             break;
3583         default:
3584             tryhelp(argv[0]);
3585         }
3586     }
3587 
3588     if (optind != argc - 1) {
3589         error_exit(argv[0], "Expecting one image file name");
3590     }
3591     filename = argv[optind++];
3592 
3593     if (!action) {
3594         action = SNAPSHOT_LIST;
3595     }
3596 
3597     /* Open the image */
3598     blk = img_open(image_opts, filename, fmt,
3599                    action == SNAPSHOT_LIST ? 0 : BDRV_O_RDWR,
3600                    false, quiet, force_share);
3601     if (!blk) {
3602         return 1;
3603     }
3604     bs = blk_bs(blk);
3605 
3606     /* Perform the requested action */
3607     switch(action) {
3608     case SNAPSHOT_LIST:
3609         dump_snapshots(bs);
3610         break;
3611 
3612     case SNAPSHOT_CREATE:
3613         memset(&sn, 0, sizeof(sn));
3614         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3615 
3616         rt = g_get_real_time();
3617         sn.date_sec = rt / G_USEC_PER_SEC;
3618         sn.date_nsec = (rt % G_USEC_PER_SEC) * 1000;
3619 
3620         bdrv_graph_rdlock_main_loop();
3621         ret = bdrv_snapshot_create(bs, &sn);
3622         bdrv_graph_rdunlock_main_loop();
3623 
3624         if (ret) {
3625             error_report("Could not create snapshot '%s': %s",
3626                 snapshot_name, strerror(-ret));
3627         }
3628         break;
3629 
3630     case SNAPSHOT_APPLY:
3631         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3632         if (ret) {
3633             error_reportf_err(err, "Could not apply snapshot '%s': ",
3634                               snapshot_name);
3635         }
3636         break;
3637 
3638     case SNAPSHOT_DELETE:
3639         bdrv_drain_all_begin();
3640         bdrv_graph_rdlock_main_loop();
3641         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3642         if (ret < 0) {
3643             error_report("Could not delete snapshot '%s': snapshot not "
3644                          "found", snapshot_name);
3645             ret = 1;
3646         } else {
3647             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3648             if (ret < 0) {
3649                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3650                                   snapshot_name);
3651                 ret = 1;
3652             }
3653         }
3654         bdrv_graph_rdunlock_main_loop();
3655         bdrv_drain_all_end();
3656         break;
3657     }
3658 
3659     /* Cleanup */
3660     blk_unref(blk);
3661     if (ret) {
3662         return 1;
3663     }
3664     return 0;
3665 }
3666 
3667 static int img_rebase(const img_cmd_t *ccmd, int argc, char **argv)
3668 {
3669     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3670     uint8_t *buf_old = NULL;
3671     uint8_t *buf_new = NULL;
3672     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3673     BlockDriverState *unfiltered_bs, *unfiltered_bs_cow;
3674     BlockDriverInfo bdi = {0};
3675     char *filename;
3676     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3677     int c, flags, src_flags, ret;
3678     BdrvRequestFlags write_flags = 0;
3679     bool writethrough, src_writethrough;
3680     int unsafe = 0;
3681     bool force_share = false;
3682     int progress = 0;
3683     bool quiet = false;
3684     bool compress = false;
3685     Error *local_err = NULL;
3686     bool image_opts = false;
3687     int64_t write_align;
3688 
3689     /* Parse commandline parameters */
3690     fmt = NULL;
3691     cache = BDRV_DEFAULT_CACHE;
3692     src_cache = BDRV_DEFAULT_CACHE;
3693     out_baseimg = NULL;
3694     out_basefmt = NULL;
3695     for(;;) {
3696         static const struct option long_options[] = {
3697             {"help", no_argument, 0, 'h'},
3698             {"format", required_argument, 0, 'f'},
3699             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3700             {"cache", required_argument, 0, 't'},
3701             {"compress", no_argument, 0, 'c'},
3702             {"backing", required_argument, 0, 'b'},
3703             {"backing-format", required_argument, 0, 'B'},
3704             {"backing-cache", required_argument, 0, 'T'},
3705             {"backing-unsafe", no_argument, 0, 'u'},
3706             {"force-share", no_argument, 0, 'U'},
3707             {"progress", no_argument, 0, 'p'},
3708             {"quiet", no_argument, 0, 'q'},
3709             {"object", required_argument, 0, OPTION_OBJECT},
3710             {0, 0, 0, 0}
3711         };
3712         c = getopt_long(argc, argv, "hf:t:cb:F:B:T:uUpq",
3713                         long_options, NULL);
3714         if (c == -1) {
3715             break;
3716         }
3717         switch (c) {
3718         case 'h':
3719             cmd_help(ccmd, "[-f FMT | --image-opts] [-t CACHE]\n"
3720 "        [-b BACKING_FILE [-B BACKING_FMT] [-T BACKING_CACHE]] [-u]\n"
3721 "        [-c] [-U] [-p] [-q] [--object OBJDEF] FILE\n"
3722 ,
3723 "  -f, --format FMT\n"
3724 "     specify FILE format explicitly (default: probing is used)\n"
3725 "  --image-opts\n"
3726 "     treat FILE as an option string (key=value,..), not a file name\n"
3727 "     (incompatible with -f|--format)\n"
3728 "  -t, --cache CACHE\n"
3729 "     cache mode for FILE (default: " BDRV_DEFAULT_CACHE ")\n"
3730 "  -b, --backing BACKING_FILE|\"\"\n"
3731 "     rebase onto this file (specify empty name for no backing file)\n"
3732 "  -B, --backing-format BACKING_FMT (was -F in <=10.0)\n"
3733 "     specify format for BACKING_FILE explicitly (default: probing is used)\n"
3734 "  -T, --backing-cache CACHE\n"
3735 "     BACKING_FILE cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
3736 "  -u, --backing-unsafe\n"
3737 "     do not fail if BACKING_FILE can not be read\n"
3738 "  -c, --compress\n"
3739 "     compress image (when image supports this)\n"
3740 "  -U, --force-share\n"
3741 "     open image in shared mode for concurrent access\n"
3742 "  -p, --progress\n"
3743 "     display progress information\n"
3744 "  -q, --quiet\n"
3745 "     quiet mode (produce only error messages if any)\n"
3746 "  --object OBJDEF\n"
3747 "     defines QEMU user-creatable object\n"
3748 "  FILE\n"
3749 "     name of the image file, or option string (key=value,..)\n"
3750 "     with --image-opts, to operate on\n"
3751 );
3752             return 0;
3753         case 'f':
3754             fmt = optarg;
3755             break;
3756         case OPTION_IMAGE_OPTS:
3757             image_opts = true;
3758             break;
3759         case 't':
3760             cache = optarg;
3761             break;
3762         case 'b':
3763             out_baseimg = optarg;
3764             break;
3765         case 'F': /* <=10.0 */
3766         case 'B':
3767             out_basefmt = optarg;
3768             break;
3769         case 'u':
3770             unsafe = 1;
3771             break;
3772         case 'c':
3773             compress = true;
3774             break;
3775         case 'U':
3776             force_share = true;
3777             break;
3778         case 'p':
3779             progress = 1;
3780             break;
3781         case 'T':
3782             src_cache = optarg;
3783             break;
3784         case 'q':
3785             quiet = true;
3786             break;
3787         case OPTION_OBJECT:
3788             user_creatable_process_cmdline(optarg);
3789             break;
3790         default:
3791             tryhelp(argv[0]);
3792         }
3793     }
3794 
3795     if (quiet) {
3796         progress = 0;
3797     }
3798 
3799     if (optind != argc - 1) {
3800         error_exit(argv[0], "Expecting one image file name");
3801     }
3802     if (!unsafe && !out_baseimg) {
3803         error_exit(argv[0],
3804                    "Must specify backing file (-b) or use unsafe mode (-u)");
3805     }
3806     filename = argv[optind++];
3807 
3808     qemu_progress_init(progress, 2.0);
3809     qemu_progress_print(0, 100);
3810 
3811     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3812     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3813     if (ret < 0) {
3814         error_report("Invalid cache option: %s", cache);
3815         goto out;
3816     }
3817 
3818     src_flags = 0;
3819     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3820     if (ret < 0) {
3821         error_report("Invalid source cache option: %s", src_cache);
3822         goto out;
3823     }
3824 
3825     /* The source files are opened read-only, don't care about WCE */
3826     assert((src_flags & BDRV_O_RDWR) == 0);
3827     (void) src_writethrough;
3828 
3829     /*
3830      * Open the images.
3831      *
3832      * Ignore the old backing file for unsafe rebase in case we want to correct
3833      * the reference to a renamed or moved backing file.
3834      */
3835     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3836                    false);
3837     if (!blk) {
3838         ret = -1;
3839         goto out;
3840     }
3841     bs = blk_bs(blk);
3842 
3843     bdrv_graph_rdlock_main_loop();
3844     unfiltered_bs = bdrv_skip_filters(bs);
3845     unfiltered_bs_cow = bdrv_cow_bs(unfiltered_bs);
3846     bdrv_graph_rdunlock_main_loop();
3847 
3848     if (compress && !block_driver_can_compress(unfiltered_bs->drv)) {
3849         error_report("Compression not supported for this file format");
3850         ret = -1;
3851         goto out;
3852     } else if (compress) {
3853         write_flags |= BDRV_REQ_WRITE_COMPRESSED;
3854     }
3855 
3856     if (out_basefmt != NULL) {
3857         if (bdrv_find_format(out_basefmt) == NULL) {
3858             error_report("Invalid format name: '%s'", out_basefmt);
3859             ret = -1;
3860             goto out;
3861         }
3862     }
3863 
3864     /*
3865      * We need overlay subcluster size (or cluster size in case writes are
3866      * compressed) to make sure write requests are aligned.
3867      */
3868     ret = bdrv_get_info(unfiltered_bs, &bdi);
3869     if (ret < 0) {
3870         error_report("could not get block driver info");
3871         goto out;
3872     } else if (bdi.subcluster_size == 0) {
3873         bdi.cluster_size = bdi.subcluster_size = 1;
3874     }
3875 
3876     write_align = compress ? bdi.cluster_size : bdi.subcluster_size;
3877 
3878     /* For safe rebasing we need to compare old and new backing file */
3879     if (!unsafe) {
3880         QDict *options = NULL;
3881         BlockDriverState *base_bs;
3882 
3883         bdrv_graph_rdlock_main_loop();
3884         base_bs = bdrv_cow_bs(unfiltered_bs);
3885         bdrv_graph_rdunlock_main_loop();
3886 
3887         if (base_bs) {
3888             blk_old_backing = blk_new(qemu_get_aio_context(),
3889                                       BLK_PERM_CONSISTENT_READ,
3890                                       BLK_PERM_ALL);
3891             ret = blk_insert_bs(blk_old_backing, base_bs,
3892                                 &local_err);
3893             if (ret < 0) {
3894                 error_reportf_err(local_err,
3895                                   "Could not reuse old backing file '%s': ",
3896                                   base_bs->filename);
3897                 goto out;
3898             }
3899         } else {
3900             blk_old_backing = NULL;
3901         }
3902 
3903         if (out_baseimg[0]) {
3904             const char *overlay_filename;
3905             char *out_real_path;
3906 
3907             options = qdict_new();
3908             if (out_basefmt) {
3909                 qdict_put_str(options, "driver", out_basefmt);
3910             }
3911             if (force_share) {
3912                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3913             }
3914 
3915             bdrv_graph_rdlock_main_loop();
3916             bdrv_refresh_filename(bs);
3917             bdrv_graph_rdunlock_main_loop();
3918             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3919                                                      : bs->filename;
3920             out_real_path =
3921                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3922                                                              out_baseimg,
3923                                                              &local_err);
3924             if (local_err) {
3925                 qobject_unref(options);
3926                 error_reportf_err(local_err,
3927                                   "Could not resolve backing filename: ");
3928                 ret = -1;
3929                 goto out;
3930             }
3931 
3932             /*
3933              * Find out whether we rebase an image on top of a previous image
3934              * in its chain.
3935              */
3936             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3937             if (prefix_chain_bs) {
3938                 qobject_unref(options);
3939                 g_free(out_real_path);
3940 
3941                 blk_new_backing = blk_new(qemu_get_aio_context(),
3942                                           BLK_PERM_CONSISTENT_READ,
3943                                           BLK_PERM_ALL);
3944                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3945                                     &local_err);
3946                 if (ret < 0) {
3947                     error_reportf_err(local_err,
3948                                       "Could not reuse backing file '%s': ",
3949                                       out_baseimg);
3950                     goto out;
3951                 }
3952             } else {
3953                 blk_new_backing = blk_new_open(out_real_path, NULL,
3954                                                options, src_flags, &local_err);
3955                 g_free(out_real_path);
3956                 if (!blk_new_backing) {
3957                     error_reportf_err(local_err,
3958                                       "Could not open new backing file '%s': ",
3959                                       out_baseimg);
3960                     ret = -1;
3961                     goto out;
3962                 }
3963             }
3964         }
3965     }
3966 
3967     /*
3968      * Check each unallocated cluster in the COW file. If it is unallocated,
3969      * accesses go to the backing file. We must therefore compare this cluster
3970      * in the old and new backing file, and if they differ we need to copy it
3971      * from the old backing file into the COW file.
3972      *
3973      * If qemu-img crashes during this step, no harm is done. The content of
3974      * the image is the same as the original one at any time.
3975      */
3976     if (!unsafe) {
3977         int64_t size;
3978         int64_t old_backing_size = 0;
3979         int64_t new_backing_size = 0;
3980         uint64_t offset;
3981         int64_t n, n_old = 0, n_new = 0;
3982         float local_progress = 0;
3983 
3984         if (blk_old_backing && bdrv_opt_mem_align(blk_bs(blk_old_backing)) >
3985             bdrv_opt_mem_align(blk_bs(blk))) {
3986             buf_old = blk_blockalign(blk_old_backing, IO_BUF_SIZE);
3987         } else {
3988             buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3989         }
3990         buf_new = blk_blockalign(blk_new_backing, IO_BUF_SIZE);
3991 
3992         size = blk_getlength(blk);
3993         if (size < 0) {
3994             error_report("Could not get size of '%s': %s",
3995                          filename, strerror(-size));
3996             ret = -1;
3997             goto out;
3998         }
3999         if (blk_old_backing) {
4000             old_backing_size = blk_getlength(blk_old_backing);
4001             if (old_backing_size < 0) {
4002                 char backing_name[PATH_MAX];
4003 
4004                 bdrv_get_backing_filename(bs, backing_name,
4005                                           sizeof(backing_name));
4006                 error_report("Could not get size of '%s': %s",
4007                              backing_name, strerror(-old_backing_size));
4008                 ret = -1;
4009                 goto out;
4010             }
4011         }
4012         if (blk_new_backing) {
4013             new_backing_size = blk_getlength(blk_new_backing);
4014             if (new_backing_size < 0) {
4015                 error_report("Could not get size of '%s': %s",
4016                              out_baseimg, strerror(-new_backing_size));
4017                 ret = -1;
4018                 goto out;
4019             }
4020         }
4021 
4022         if (size != 0) {
4023             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
4024         }
4025 
4026         for (offset = 0; offset < size; offset += n) {
4027             bool old_backing_eof = false;
4028             int64_t n_alloc;
4029 
4030             /* How many bytes can we handle with the next read? */
4031             n = MIN(IO_BUF_SIZE, size - offset);
4032 
4033             /* If the cluster is allocated, we don't need to take action */
4034             ret = bdrv_is_allocated(unfiltered_bs, offset, n, &n);
4035             if (ret < 0) {
4036                 error_report("error while reading image metadata: %s",
4037                              strerror(-ret));
4038                 goto out;
4039             }
4040             if (ret) {
4041                 continue;
4042             }
4043 
4044             if (prefix_chain_bs) {
4045                 uint64_t bytes = n;
4046 
4047                 /*
4048                  * If cluster wasn't changed since prefix_chain, we don't need
4049                  * to take action
4050                  */
4051                 ret = bdrv_is_allocated_above(unfiltered_bs_cow,
4052                                               prefix_chain_bs, false,
4053                                               offset, n, &n);
4054                 if (ret < 0) {
4055                     error_report("error while reading image metadata: %s",
4056                                  strerror(-ret));
4057                     goto out;
4058                 }
4059                 if (!ret && n) {
4060                     continue;
4061                 }
4062                 if (!n) {
4063                     /*
4064                      * If we've reached EOF of the old backing, it means that
4065                      * offsets beyond the old backing size were read as zeroes.
4066                      * Now we will need to explicitly zero the cluster in
4067                      * order to preserve that state after the rebase.
4068                      */
4069                     n = bytes;
4070                 }
4071             }
4072 
4073             /*
4074              * At this point we know that the region [offset; offset + n)
4075              * is unallocated within the target image.  This region might be
4076              * unaligned to the target image's (sub)cluster boundaries, as
4077              * old backing may have smaller clusters (or have subclusters).
4078              * We extend it to the aligned boundaries to avoid CoW on
4079              * partial writes in blk_pwrite(),
4080              */
4081             n += offset - QEMU_ALIGN_DOWN(offset, write_align);
4082             offset = QEMU_ALIGN_DOWN(offset, write_align);
4083             n += QEMU_ALIGN_UP(offset + n, write_align) - (offset + n);
4084             n = MIN(n, MIN(size - offset, IO_BUF_SIZE));
4085             assert(!bdrv_is_allocated(unfiltered_bs, offset, n, &n_alloc) &&
4086                    n_alloc == n);
4087 
4088             /*
4089              * Much like with the target image, we'll try to read as much
4090              * of the old and new backings as we can.
4091              */
4092             n_old = MIN(n, MAX(0, old_backing_size - (int64_t) offset));
4093             n_new = MIN(n, MAX(0, new_backing_size - (int64_t) offset));
4094 
4095             /*
4096              * Read old and new backing file and take into consideration that
4097              * backing files may be smaller than the COW image.
4098              */
4099             memset(buf_old + n_old, 0, n - n_old);
4100             if (!n_old) {
4101                 old_backing_eof = true;
4102             } else {
4103                 ret = blk_pread(blk_old_backing, offset, n_old, buf_old, 0);
4104                 if (ret < 0) {
4105                     error_report("error while reading from old backing file");
4106                     goto out;
4107                 }
4108             }
4109 
4110             memset(buf_new + n_new, 0, n - n_new);
4111             if (n_new) {
4112                 ret = blk_pread(blk_new_backing, offset, n_new, buf_new, 0);
4113                 if (ret < 0) {
4114                     error_report("error while reading from new backing file");
4115                     goto out;
4116                 }
4117             }
4118 
4119             /* If they differ, we need to write to the COW file */
4120             uint64_t written = 0;
4121 
4122             while (written < n) {
4123                 int64_t pnum;
4124 
4125                 if (compare_buffers(buf_old + written, buf_new + written,
4126                                     n - written, write_align, &pnum))
4127                 {
4128                     if (old_backing_eof) {
4129                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
4130                     } else {
4131                         assert(written + pnum <= IO_BUF_SIZE);
4132                         ret = blk_pwrite(blk, offset + written, pnum,
4133                                          buf_old + written, write_flags);
4134                     }
4135                     if (ret < 0) {
4136                         error_report("Error while writing to COW image: %s",
4137                             strerror(-ret));
4138                         goto out;
4139                     }
4140                 }
4141 
4142                 written += pnum;
4143                 if (offset + written >= old_backing_size) {
4144                     old_backing_eof = true;
4145                 }
4146             }
4147             qemu_progress_print(local_progress, 100);
4148         }
4149     }
4150 
4151     /*
4152      * Change the backing file. All clusters that are different from the old
4153      * backing file are overwritten in the COW file now, so the visible content
4154      * doesn't change when we switch the backing file.
4155      */
4156     if (out_baseimg && *out_baseimg) {
4157         ret = bdrv_change_backing_file(unfiltered_bs, out_baseimg, out_basefmt,
4158                                        true);
4159     } else {
4160         ret = bdrv_change_backing_file(unfiltered_bs, NULL, NULL, false);
4161     }
4162 
4163     if (ret == -ENOSPC) {
4164         error_report("Could not change the backing file to '%s': No "
4165                      "space left in the file header", out_baseimg);
4166     } else if (ret == -EINVAL && out_baseimg && !out_basefmt) {
4167         error_report("Could not change the backing file to '%s': backing "
4168                      "format must be specified", out_baseimg);
4169     } else if (ret < 0) {
4170         error_report("Could not change the backing file to '%s': %s",
4171             out_baseimg, strerror(-ret));
4172     }
4173 
4174     qemu_progress_print(100, 0);
4175     /*
4176      * TODO At this point it is possible to check if any clusters that are
4177      * allocated in the COW file are the same in the backing file. If so, they
4178      * could be dropped from the COW file. Don't do this before switching the
4179      * backing file, in case of a crash this would lead to corruption.
4180      */
4181 out:
4182     qemu_progress_end();
4183     /* Cleanup */
4184     if (!unsafe) {
4185         blk_unref(blk_old_backing);
4186         blk_unref(blk_new_backing);
4187     }
4188     qemu_vfree(buf_old);
4189     qemu_vfree(buf_new);
4190 
4191     blk_unref(blk);
4192     if (ret) {
4193         return 1;
4194     }
4195     return 0;
4196 }
4197 
4198 static int img_resize(const img_cmd_t *ccmd, int argc, char **argv)
4199 {
4200     Error *err = NULL;
4201     int c, ret, relative;
4202     const char *filename = NULL, *fmt = NULL, *size = NULL;
4203     int64_t n, total_size, current_size;
4204     bool quiet = false;
4205     BlockBackend *blk = NULL;
4206     PreallocMode prealloc = PREALLOC_MODE_OFF;
4207     QemuOpts *param;
4208 
4209     static QemuOptsList resize_options = {
4210         .name = "resize_options",
4211         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
4212         .desc = {
4213             {
4214                 .name = BLOCK_OPT_SIZE,
4215                 .type = QEMU_OPT_SIZE,
4216                 .help = "Virtual disk size"
4217             }, {
4218                 /* end of list */
4219             }
4220         },
4221     };
4222     bool image_opts = false;
4223     bool shrink = false;
4224 
4225     /* Parse getopt arguments */
4226     for(;;) {
4227         static const struct option long_options[] = {
4228             {"help", no_argument, 0, 'h'},
4229             {"format", required_argument, 0, 'f'},
4230             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4231             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
4232             {"shrink", no_argument, 0, OPTION_SHRINK},
4233             {"quiet", no_argument, 0, 'q'},
4234             {"object", required_argument, 0, OPTION_OBJECT},
4235             {0, 0, 0, 0}
4236         };
4237         c = getopt_long(argc, argv, "-hf:q",
4238                         long_options, NULL);
4239         if (c == -1) {
4240             break;
4241         }
4242         switch(c) {
4243         case 'h':
4244             cmd_help(ccmd, "[-f FMT | --image-opts] [--preallocation PREALLOC] [--shrink]\n"
4245 "        [-q] [--object OBJDEF] FILE [+-]SIZE[bkKMGTPE]\n"
4246 ,
4247 "  -f, --format FMT\n"
4248 "     specify FILE format explicitly (default: probing is used)\n"
4249 "  --image-opts\n"
4250 "     treat FILE as an option string (key=value,...), not a file name\n"
4251 "     (incompatible with -f|--format)\n"
4252 "  --shrink\n"
4253 "     allow operation when the new size is smaller than the original\n"
4254 "  --preallocation PREALLOC\n"
4255 "     specify FMT-specific preallocation type for the new areas\n"
4256 "  -q, --quiet\n"
4257 "     quiet mode (produce only error messages if any)\n"
4258 "  --object OBJDEF\n"
4259 "     defines QEMU user-creatable object\n"
4260 "  FILE\n"
4261 "     name of the image file, or option string (key=value,..)\n"
4262 "     with --image-opts, to operate on\n"
4263 "  [+-]SIZE[bkKMGTPE]\n"
4264 "     new image size or amount by which to shrink (-)/grow (+),\n"
4265 "     with optional multiplier suffix (powers of 1024, default is bytes)\n"
4266 );
4267             return 0;
4268         case 'f':
4269             fmt = optarg;
4270             break;
4271         case OPTION_IMAGE_OPTS:
4272             image_opts = true;
4273             break;
4274         case OPTION_PREALLOCATION:
4275             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
4276                                        PREALLOC_MODE__MAX, NULL);
4277             if (prealloc == PREALLOC_MODE__MAX) {
4278                 error_report("Invalid preallocation mode '%s'", optarg);
4279                 return 1;
4280             }
4281             break;
4282         case OPTION_SHRINK:
4283             shrink = true;
4284             break;
4285         case 'q':
4286             quiet = true;
4287             break;
4288         case OPTION_OBJECT:
4289             user_creatable_process_cmdline(optarg);
4290             break;
4291         case 1: /* a non-optional argument */
4292             if (!filename) {
4293                 filename = optarg;
4294                 /* see if we have -size (number) next to filename */
4295                 if (optind < argc) {
4296                     size = argv[optind];
4297                     if (size[0] == '-' && size[1] >= '0' && size[1] <= '9') {
4298                         ++optind;
4299                     } else {
4300                         size = NULL;
4301                     }
4302                 }
4303             } else if (!size) {
4304                 size = optarg;
4305             } else {
4306                 error_exit(argv[0], "Extra argument(s) in command line");
4307             }
4308             break;
4309         default:
4310             tryhelp(argv[0]);
4311         }
4312     }
4313     if (!filename && optind < argc) {
4314         filename = argv[optind++];
4315     }
4316     if (!size && optind < argc) {
4317         size = argv[optind++];
4318     }
4319     if (!filename || !size || optind < argc) {
4320         error_exit(argv[0], "Expecting image file name and size");
4321     }
4322 
4323     /* Choose grow, shrink, or absolute resize mode */
4324     switch (size[0]) {
4325     case '+':
4326         relative = 1;
4327         size++;
4328         break;
4329     case '-':
4330         relative = -1;
4331         size++;
4332         break;
4333     default:
4334         relative = 0;
4335         break;
4336     }
4337 
4338     /* Parse size */
4339     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
4340     if (!qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err)) {
4341         error_report_err(err);
4342         ret = -1;
4343         qemu_opts_del(param);
4344         goto out;
4345     }
4346     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
4347     qemu_opts_del(param);
4348 
4349     blk = img_open(image_opts, filename, fmt,
4350                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
4351                    false);
4352     if (!blk) {
4353         ret = -1;
4354         goto out;
4355     }
4356 
4357     current_size = blk_getlength(blk);
4358     if (current_size < 0) {
4359         error_report("Failed to inquire current image length: %s",
4360                      strerror(-current_size));
4361         ret = -1;
4362         goto out;
4363     }
4364 
4365     if (relative) {
4366         total_size = current_size + n * relative;
4367     } else {
4368         total_size = n;
4369     }
4370     if (total_size <= 0) {
4371         error_report("New image size must be positive");
4372         ret = -1;
4373         goto out;
4374     }
4375 
4376     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
4377         error_report("Preallocation can only be used for growing images");
4378         ret = -1;
4379         goto out;
4380     }
4381 
4382     if (total_size < current_size && !shrink) {
4383         error_report("Use the --shrink option to perform a shrink operation.");
4384         warn_report("Shrinking an image will delete all data beyond the "
4385                     "shrunken image's end. Before performing such an "
4386                     "operation, make sure there is no important data there.");
4387         ret = -1;
4388         goto out;
4389     }
4390 
4391     /*
4392      * The user expects the image to have the desired size after
4393      * resizing, so pass @exact=true.  It is of no use to report
4394      * success when the image has not actually been resized.
4395      */
4396     ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
4397     if (!ret) {
4398         qprintf(quiet, "Image resized.\n");
4399     } else {
4400         error_report_err(err);
4401     }
4402 out:
4403     blk_unref(blk);
4404     if (ret) {
4405         return 1;
4406     }
4407     return 0;
4408 }
4409 
4410 static void amend_status_cb(BlockDriverState *bs,
4411                             int64_t offset, int64_t total_work_size,
4412                             void *opaque)
4413 {
4414     qemu_progress_print(100.f * offset / total_work_size, 0);
4415 }
4416 
4417 static int print_amend_option_help(const char *format)
4418 {
4419     BlockDriver *drv;
4420 
4421     GRAPH_RDLOCK_GUARD_MAINLOOP();
4422 
4423     /* Find driver and parse its options */
4424     drv = bdrv_find_format(format);
4425     if (!drv) {
4426         error_report("Unknown file format '%s'", format);
4427         return 1;
4428     }
4429 
4430     if (!drv->bdrv_amend_options) {
4431         error_report("Format driver '%s' does not support option amendment",
4432                      format);
4433         return 1;
4434     }
4435 
4436     /* Every driver supporting amendment must have amend_opts */
4437     assert(drv->amend_opts);
4438 
4439     printf("Amend options for '%s':\n", format);
4440     qemu_opts_print_help(drv->amend_opts, false);
4441     return 0;
4442 }
4443 
4444 static int img_amend(const img_cmd_t *ccmd, int argc, char **argv)
4445 {
4446     Error *err = NULL;
4447     int c, ret = 0;
4448     char *options = NULL;
4449     QemuOptsList *amend_opts = NULL;
4450     QemuOpts *opts = NULL;
4451     const char *fmt = NULL, *filename, *cache;
4452     int flags;
4453     bool writethrough;
4454     bool quiet = false, progress = false;
4455     BlockBackend *blk = NULL;
4456     BlockDriverState *bs = NULL;
4457     bool image_opts = false;
4458     bool force = false;
4459 
4460     cache = BDRV_DEFAULT_CACHE;
4461     for (;;) {
4462         static const struct option long_options[] = {
4463             {"help", no_argument, 0, 'h'},
4464             {"options", required_argument, 0, 'o'},
4465             {"format", required_argument, 0, 'f'},
4466             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4467             {"cache", required_argument, 0, 't'},
4468             {"force", no_argument, 0, OPTION_FORCE},
4469             {"progress", no_argument, 0, 'p'},
4470             {"quiet", no_argument, 0, 'q'},
4471             {"object", required_argument, 0, OPTION_OBJECT},
4472             {0, 0, 0, 0}
4473         };
4474         c = getopt_long(argc, argv, "ho:f:t:pq",
4475                         long_options, NULL);
4476         if (c == -1) {
4477             break;
4478         }
4479 
4480         switch (c) {
4481         case 'h':
4482             cmd_help(ccmd, "-o FMT_OPTS [-f FMT | --image-opts]\n"
4483 "        [-t CACHE] [--force] [-p] [-q] [--object OBJDEF] FILE\n"
4484 ,
4485 "  -o, --options FMT_OPTS\n"
4486 "     FMT-specfic format options (required)\n"
4487 "  -f, --format FMT\n"
4488 "     specify FILE format explicitly (default: probing is used)\n"
4489 "  --image-opts\n"
4490 "     treat FILE as an option string (key=value,..), not a file name\n"
4491 "     (incompatible with -f|--format)\n"
4492 "  -t, --cache CACHE\n"
4493 "     cache mode for FILE (default: " BDRV_DEFAULT_CACHE ")\n"
4494 "  --force\n"
4495 "     allow certain unsafe operations\n"
4496 "  -p, --progres\n"
4497 "     show operation progress\n"
4498 "  -q, --quiet\n"
4499 "     quiet mode (produce only error messages if any)\n"
4500 "  --object OBJDEF\n"
4501 "     defines QEMU user-creatable object\n"
4502 "  FILE\n"
4503 "     name of the image file, or option string (key=value,..)\n"
4504 "     with --image-opts, to operate on\n"
4505 );
4506             break;
4507         case 'o':
4508             if (accumulate_options(&options, optarg) < 0) {
4509                 ret = -1;
4510                 goto out_no_progress;
4511             }
4512             break;
4513         case 'f':
4514             fmt = optarg;
4515             break;
4516         case OPTION_IMAGE_OPTS:
4517             image_opts = true;
4518             break;
4519         case 't':
4520             cache = optarg;
4521             break;
4522         case OPTION_FORCE:
4523             force = true;
4524             break;
4525         case 'p':
4526             progress = true;
4527             break;
4528         case 'q':
4529             quiet = true;
4530             break;
4531         case OPTION_OBJECT:
4532             user_creatable_process_cmdline(optarg);
4533             break;
4534         default:
4535             tryhelp(argv[0]);
4536         }
4537     }
4538 
4539     if (!options) {
4540         error_exit(argv[0], "Must specify options (-o)");
4541     }
4542 
4543     if (quiet) {
4544         progress = false;
4545     }
4546     qemu_progress_init(progress, 1.0);
4547 
4548     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4549     if (fmt && has_help_option(options)) {
4550         /* If a format is explicitly specified (and possibly no filename is
4551          * given), print option help here */
4552         ret = print_amend_option_help(fmt);
4553         goto out;
4554     }
4555 
4556     if (optind != argc - 1) {
4557         error_report("Expecting one image file name");
4558         ret = -1;
4559         goto out;
4560     }
4561 
4562     flags = BDRV_O_RDWR;
4563     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4564     if (ret < 0) {
4565         error_report("Invalid cache option: %s", cache);
4566         goto out;
4567     }
4568 
4569     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4570                    false);
4571     if (!blk) {
4572         ret = -1;
4573         goto out;
4574     }
4575     bs = blk_bs(blk);
4576 
4577     fmt = bs->drv->format_name;
4578 
4579     if (has_help_option(options)) {
4580         /* If the format was auto-detected, print option help here */
4581         ret = print_amend_option_help(fmt);
4582         goto out;
4583     }
4584 
4585     bdrv_graph_rdlock_main_loop();
4586     if (!bs->drv->bdrv_amend_options) {
4587         error_report("Format driver '%s' does not support option amendment",
4588                      fmt);
4589         bdrv_graph_rdunlock_main_loop();
4590         ret = -1;
4591         goto out;
4592     }
4593 
4594     /* Every driver supporting amendment must have amend_opts */
4595     assert(bs->drv->amend_opts);
4596 
4597     amend_opts = qemu_opts_append(amend_opts, bs->drv->amend_opts);
4598     opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4599     if (!qemu_opts_do_parse(opts, options, NULL, &err)) {
4600         qemu_opts_del(opts);
4601         /* Try to parse options using the create options */
4602         amend_opts = qemu_opts_append(amend_opts, bs->drv->create_opts);
4603         opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4604         if (qemu_opts_do_parse(opts, options, NULL, NULL)) {
4605             error_append_hint(&err,
4606                               "This option is only supported for image creation\n");
4607         }
4608 
4609         bdrv_graph_rdunlock_main_loop();
4610         error_report_err(err);
4611         ret = -1;
4612         goto out;
4613     }
4614 
4615     /* In case the driver does not call amend_status_cb() */
4616     qemu_progress_print(0.f, 0);
4617     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err);
4618     qemu_progress_print(100.f, 0);
4619     bdrv_graph_rdunlock_main_loop();
4620 
4621     if (ret < 0) {
4622         error_report_err(err);
4623         goto out;
4624     }
4625 
4626 out:
4627     qemu_progress_end();
4628 
4629 out_no_progress:
4630     blk_unref(blk);
4631     qemu_opts_del(opts);
4632     qemu_opts_free(amend_opts);
4633     g_free(options);
4634 
4635     if (ret) {
4636         return 1;
4637     }
4638     return 0;
4639 }
4640 
4641 typedef struct BenchData {
4642     BlockBackend *blk;
4643     uint64_t image_size;
4644     bool write;
4645     int bufsize;
4646     int step;
4647     int nrreq;
4648     int n;
4649     int flush_interval;
4650     bool drain_on_flush;
4651     uint8_t *buf;
4652     QEMUIOVector *qiov;
4653 
4654     int in_flight;
4655     bool in_flush;
4656     uint64_t offset;
4657 } BenchData;
4658 
4659 static void bench_undrained_flush_cb(void *opaque, int ret)
4660 {
4661     if (ret < 0) {
4662         error_report("Failed flush request: %s", strerror(-ret));
4663         exit(EXIT_FAILURE);
4664     }
4665 }
4666 
4667 static void bench_cb(void *opaque, int ret)
4668 {
4669     BenchData *b = opaque;
4670     BlockAIOCB *acb;
4671 
4672     if (ret < 0) {
4673         error_report("Failed request: %s", strerror(-ret));
4674         exit(EXIT_FAILURE);
4675     }
4676 
4677     if (b->in_flush) {
4678         /* Just finished a flush with drained queue: Start next requests */
4679         assert(b->in_flight == 0);
4680         b->in_flush = false;
4681     } else if (b->in_flight > 0) {
4682         int remaining = b->n - b->in_flight;
4683 
4684         b->n--;
4685         b->in_flight--;
4686 
4687         /* Time for flush? Drain queue if requested, then flush */
4688         if (b->flush_interval && remaining % b->flush_interval == 0) {
4689             if (!b->in_flight || !b->drain_on_flush) {
4690                 BlockCompletionFunc *cb;
4691 
4692                 if (b->drain_on_flush) {
4693                     b->in_flush = true;
4694                     cb = bench_cb;
4695                 } else {
4696                     cb = bench_undrained_flush_cb;
4697                 }
4698 
4699                 acb = blk_aio_flush(b->blk, cb, b);
4700                 if (!acb) {
4701                     error_report("Failed to issue flush request");
4702                     exit(EXIT_FAILURE);
4703                 }
4704             }
4705             if (b->drain_on_flush) {
4706                 return;
4707             }
4708         }
4709     }
4710 
4711     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4712         int64_t offset = b->offset;
4713         /* blk_aio_* might look for completed I/Os and kick bench_cb
4714          * again, so make sure this operation is counted by in_flight
4715          * and b->offset is ready for the next submission.
4716          */
4717         b->in_flight++;
4718         b->offset += b->step;
4719         if (b->image_size <= b->bufsize) {
4720             b->offset = 0;
4721         } else {
4722             b->offset %= b->image_size - b->bufsize;
4723         }
4724         if (b->write) {
4725             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4726         } else {
4727             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4728         }
4729         if (!acb) {
4730             error_report("Failed to issue request");
4731             exit(EXIT_FAILURE);
4732         }
4733     }
4734 }
4735 
4736 static int img_bench(const img_cmd_t *ccmd, int argc, char **argv)
4737 {
4738     int c, ret = 0;
4739     const char *fmt = NULL, *filename;
4740     bool quiet = false;
4741     bool image_opts = false;
4742     bool is_write = false;
4743     int count = 75000;
4744     int depth = 64;
4745     int64_t offset = 0;
4746     ssize_t bufsize = 4096;
4747     int pattern = 0;
4748     ssize_t step = 0;
4749     int flush_interval = 0;
4750     bool drain_on_flush = true;
4751     int64_t image_size;
4752     BlockBackend *blk = NULL;
4753     BenchData data = {};
4754     int flags = 0;
4755     bool writethrough = false;
4756     struct timeval t1, t2;
4757     int i;
4758     bool force_share = false;
4759     size_t buf_size = 0;
4760 
4761     for (;;) {
4762         static const struct option long_options[] = {
4763             {"help", no_argument, 0, 'h'},
4764             {"format", required_argument, 0, 'f'},
4765             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4766             {"cache", required_argument, 0, 't'},
4767             {"count", required_argument, 0, 'c'},
4768             {"depth", required_argument, 0, 'd'},
4769             {"offset", required_argument, 0, 'o'},
4770             {"buffer-size", required_argument, 0, 's'},
4771             {"step-size", required_argument, 0, 'S'},
4772             {"write", no_argument, 0, 'w'},
4773             {"pattern", required_argument, 0, OPTION_PATTERN},
4774             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4775             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4776             {"aio", required_argument, 0, 'i'},
4777             {"native", no_argument, 0, 'n'},
4778             {"force-share", no_argument, 0, 'U'},
4779             {"quiet", no_argument, 0, 'q'},
4780             {"object", required_argument, 0, OPTION_OBJECT},
4781             {0, 0, 0, 0}
4782         };
4783         c = getopt_long(argc, argv, "hf:t:c:d:o:s:S:wi:nUq",
4784                         long_options, NULL);
4785         if (c == -1) {
4786             break;
4787         }
4788 
4789         switch (c) {
4790         case 'h':
4791             cmd_help(ccmd, "[-f FMT | --image-opts] [-t CACHE]\n"
4792 "        [-c COUNT] [-d DEPTH] [-o OFFSET] [-s BUFFER_SIZE] [-S STEP_SIZE]\n"
4793 "        [-w [--pattern PATTERN] [--flush-interval INTERVAL [--no-drain]]]\n"
4794 "        [-i AIO] [-n] [-U] [-q] FILE\n"
4795 ,
4796 "  -f, --format FMT\n"
4797 "     specify FILE format explicitly\n"
4798 "  --image-opts\n"
4799 "     indicates that FILE is a complete image specification\n"
4800 "     instead of a file name (incompatible with --format)\n"
4801 "  -t, --cache CACHE\n"
4802 "     cache mode for FILE (default: " BDRV_DEFAULT_CACHE ")\n"
4803 "  -c, --count COUNT\n"
4804 "     number of I/O requests to perform\n"
4805 "  -d, --depth DEPTH\n"
4806 "     number of requests to perform in parallel\n"
4807 "  -o, --offset OFFSET\n"
4808 "     start first request at this OFFSET\n"
4809 "  -s, --buffer-size BUFFER_SIZE[bkKMGTPE]\n"
4810 "     size of each I/O request, with optional multiplier suffix\n"
4811 "     (powers of 1024, default is 4K)\n"
4812 "  -S, --step-size STEP_SIZE[bkKMGTPE]\n"
4813 "     each next request offset increment, with optional multiplier suffix\n"
4814 "     (powers of 1024, default is the same as BUFFER_SIZE)\n"
4815 "  -w, --write\n"
4816 "     perform write test (default is read)\n"
4817 "  --pattern PATTERN\n"
4818 "     write this pattern byte instead of zero\n"
4819 "  --flush-interval FLUSH_INTERVAL\n"
4820 "     issue flush after this number of requests\n"
4821 "  --no-drain\n"
4822 "     do not wait when flushing pending requests\n"
4823 "  -i, --aio AIO\n"
4824 "     async-io backend (threads, native, io_uring)\n"
4825 "  -n, --native\n"
4826 "     use native AIO backend if possible\n"
4827 "  -U, --force-share\n"
4828 "     open images in shared mode for concurrent access\n"
4829 "  -q, --quiet\n"
4830 "     quiet mode (produce only error messages if any)\n"
4831 "  --object OBJDEF\n"
4832 "     defines QEMU user-creatable object\n"
4833 "  FILE\n"
4834 "     name of the image file, or option string (key=value,..)\n"
4835 "     with --image-opts, to operate on\n"
4836 );
4837             break;
4838         case 'f':
4839             fmt = optarg;
4840             break;
4841         case OPTION_IMAGE_OPTS:
4842             image_opts = true;
4843             break;
4844         case 't':
4845             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4846             if (ret < 0) {
4847                 error_report("Invalid cache mode");
4848                 ret = -1;
4849                 goto out;
4850             }
4851             break;
4852         case 'c':
4853             count = cvtnum_full("request count", optarg, false, 1, INT_MAX);
4854             if (count < 0) {
4855                 return 1;
4856             }
4857             break;
4858         case 'd':
4859             depth = cvtnum_full("queue depth", optarg, false, 1, INT_MAX);
4860             if (depth < 0) {
4861                 return 1;
4862             }
4863             break;
4864         case 'n':
4865             flags |= BDRV_O_NATIVE_AIO;
4866             break;
4867         case 'i':
4868             ret = bdrv_parse_aio(optarg, &flags);
4869             if (ret < 0) {
4870                 error_report("Invalid aio option: %s", optarg);
4871                 ret = -1;
4872                 goto out;
4873             }
4874             break;
4875         case 'o':
4876             offset = cvtnum("offset", optarg, true);
4877             if (offset < 0) {
4878                 return 1;
4879             }
4880             break;
4881         case 's':
4882             bufsize = cvtnum_full("buffer size", optarg, true, 1, INT_MAX);
4883             if (bufsize < 0) {
4884                 return 1;
4885             }
4886             break;
4887         case 'S':
4888             step = cvtnum_full("step size", optarg, true, 0, INT_MAX);
4889             if (step < 0) {
4890                 return 1;
4891             }
4892             break;
4893         case 'w':
4894             flags |= BDRV_O_RDWR;
4895             is_write = true;
4896             break;
4897         case OPTION_PATTERN:
4898             pattern = cvtnum_full("pattern byte", optarg, false, 0, 0xff);
4899             if (pattern < 0) {
4900                 return 1;
4901             }
4902             break;
4903         case OPTION_FLUSH_INTERVAL:
4904             flush_interval = cvtnum_full("flush interval", optarg,
4905                                          false, 0, INT_MAX);
4906             if (flush_interval < 0) {
4907                 return 1;
4908             }
4909             break;
4910         case OPTION_NO_DRAIN:
4911             drain_on_flush = false;
4912             break;
4913         case 'U':
4914             force_share = true;
4915             break;
4916         case 'q':
4917             quiet = true;
4918             break;
4919         case OPTION_OBJECT:
4920             user_creatable_process_cmdline(optarg);
4921             break;
4922         default:
4923             tryhelp(argv[0]);
4924         }
4925     }
4926 
4927     if (optind != argc - 1) {
4928         error_exit(argv[0], "Expecting one image file name");
4929     }
4930     filename = argv[argc - 1];
4931 
4932     if (!is_write && flush_interval) {
4933         error_report("--flush-interval is only available in write tests");
4934         ret = -1;
4935         goto out;
4936     }
4937     if (flush_interval && flush_interval < depth) {
4938         error_report("Flush interval can't be smaller than depth");
4939         ret = -1;
4940         goto out;
4941     }
4942 
4943     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4944                    force_share);
4945     if (!blk) {
4946         ret = -1;
4947         goto out;
4948     }
4949 
4950     image_size = blk_getlength(blk);
4951     if (image_size < 0) {
4952         ret = image_size;
4953         goto out;
4954     }
4955 
4956     data = (BenchData) {
4957         .blk            = blk,
4958         .image_size     = image_size,
4959         .bufsize        = bufsize,
4960         .step           = step ?: bufsize,
4961         .nrreq          = depth,
4962         .n              = count,
4963         .offset         = offset,
4964         .write          = is_write,
4965         .flush_interval = flush_interval,
4966         .drain_on_flush = drain_on_flush,
4967     };
4968     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4969            "(starting at offset %" PRId64 ", step size %d)\n",
4970            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4971            data.offset, data.step);
4972     if (flush_interval) {
4973         printf("Sending flush every %d requests\n", flush_interval);
4974     }
4975 
4976     buf_size = data.nrreq * data.bufsize;
4977     data.buf = blk_blockalign(blk, buf_size);
4978     memset(data.buf, pattern, data.nrreq * data.bufsize);
4979 
4980     blk_register_buf(blk, data.buf, buf_size, &error_fatal);
4981 
4982     data.qiov = g_new(QEMUIOVector, data.nrreq);
4983     for (i = 0; i < data.nrreq; i++) {
4984         qemu_iovec_init(&data.qiov[i], 1);
4985         qemu_iovec_add(&data.qiov[i],
4986                        data.buf + i * data.bufsize, data.bufsize);
4987     }
4988 
4989     gettimeofday(&t1, NULL);
4990     bench_cb(&data, 0);
4991 
4992     while (data.n > 0) {
4993         main_loop_wait(false);
4994     }
4995     gettimeofday(&t2, NULL);
4996 
4997     printf("Run completed in %3.3f seconds.\n",
4998            (t2.tv_sec - t1.tv_sec)
4999            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
5000 
5001 out:
5002     if (data.buf) {
5003         blk_unregister_buf(blk, data.buf, buf_size);
5004     }
5005     qemu_vfree(data.buf);
5006     blk_unref(blk);
5007 
5008     if (ret) {
5009         return 1;
5010     }
5011     return 0;
5012 }
5013 
5014 enum ImgBitmapAct {
5015     BITMAP_ADD,
5016     BITMAP_REMOVE,
5017     BITMAP_CLEAR,
5018     BITMAP_ENABLE,
5019     BITMAP_DISABLE,
5020     BITMAP_MERGE,
5021 };
5022 typedef struct ImgBitmapAction {
5023     enum ImgBitmapAct act;
5024     const char *src; /* only used for merge */
5025     QSIMPLEQ_ENTRY(ImgBitmapAction) next;
5026 } ImgBitmapAction;
5027 
5028 static int img_bitmap(const img_cmd_t *ccmd, int argc, char **argv)
5029 {
5030     Error *err = NULL;
5031     int c, ret = 1;
5032     QemuOpts *opts = NULL;
5033     const char *fmt = NULL, *src_fmt = NULL, *src_filename = NULL;
5034     const char *filename, *bitmap;
5035     BlockBackend *blk = NULL, *src = NULL;
5036     BlockDriverState *bs = NULL, *src_bs = NULL;
5037     bool image_opts = false;
5038     int64_t granularity = 0;
5039     bool add = false, merge = false;
5040     QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
5041     ImgBitmapAction *act, *act_next;
5042     const char *op;
5043     int inactivate_ret;
5044 
5045     QSIMPLEQ_INIT(&actions);
5046 
5047     for (;;) {
5048         static const struct option long_options[] = {
5049             {"help", no_argument, 0, 'h'},
5050             {"format", required_argument, 0, 'f'},
5051             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5052             {"add", no_argument, 0, OPTION_ADD},
5053             {"granularity", required_argument, 0, 'g'},
5054             {"remove", no_argument, 0, OPTION_REMOVE},
5055             {"clear", no_argument, 0, OPTION_CLEAR},
5056             {"enable", no_argument, 0, OPTION_ENABLE},
5057             {"disable", no_argument, 0, OPTION_DISABLE},
5058             {"merge", required_argument, 0, OPTION_MERGE},
5059             {"source-file", required_argument, 0, 'b'},
5060             {"source-format", required_argument, 0, 'F'},
5061             {"object", required_argument, 0, OPTION_OBJECT},
5062             {0, 0, 0, 0}
5063         };
5064         c = getopt_long(argc, argv, "hf:g:b:F:",
5065                         long_options, NULL);
5066         if (c == -1) {
5067             break;
5068         }
5069 
5070         switch (c) {
5071         case 'h':
5072             cmd_help(ccmd, "[-f FMT | --image-opts]\n"
5073 "        ( --add [-g SIZE] | --remove | --clear | --enable | --disable |\n"
5074 "          --merge SOURCE [-b SRC_FILE [-F SRC_FMT]] )..\n"
5075 "        [--object OBJDEF] FILE BITMAP\n"
5076 ,
5077 "  -f, --format FMT\n"
5078 "     specify FILE format explicitly (default: probing is used)\n"
5079 "  --image-opts\n"
5080 "     treat FILE as an option string (key=value,..), not a file name\n"
5081 "     (incompatible with -f|--format)\n"
5082 "  --add\n"
5083 "     creates BITMAP in FILE, enables to record future edits\n"
5084 "  -g, --granularity SIZE[bKMGTPE]\n"
5085 "     sets non-default granularity for the bitmap being added,\n"
5086 "     with optional multiplier suffix (in powers of 1024)\n"
5087 "  --remove\n"
5088 "     removes BITMAP from FILE\n"
5089 "  --clear\n"
5090 "     clears BITMAP in FILE\n"
5091 "  --enable, --disable\n"
5092 "     starts and stops recording future edits to BITMAP in FILE\n"
5093 "  --merge SOURCE\n"
5094 "     merges contents of the SOURCE bitmap into BITMAP in FILE\n"
5095 "  -b, --source-file SRC_FILE\n"
5096 "     select alternative source file for --merge\n"
5097 "  -F, --source-format SRC_FMT\n"
5098 "     specify format for SRC_FILE explicitly\n"
5099 "  --object OBJDEF\n"
5100 "     defines QEMU user-creatable object\n"
5101 "  FILE\n"
5102 "     name of the image file, or option string (key=value,..)\n"
5103 "     with --image-opts, to operate on\n"
5104 "  BITMAP\n"
5105 "     name of the bitmap to add, remove, clear, enable, disable or merge to\n"
5106 );
5107             break;
5108         case 'f':
5109             fmt = optarg;
5110             break;
5111         case OPTION_IMAGE_OPTS:
5112             image_opts = true;
5113             break;
5114         case OPTION_ADD:
5115             act = g_new0(ImgBitmapAction, 1);
5116             act->act = BITMAP_ADD;
5117             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5118             add = true;
5119             break;
5120         case 'g':
5121             granularity = cvtnum("granularity", optarg, true);
5122             if (granularity < 0) {
5123                 return 1;
5124             }
5125             break;
5126         case OPTION_REMOVE:
5127             act = g_new0(ImgBitmapAction, 1);
5128             act->act = BITMAP_REMOVE;
5129             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5130             break;
5131         case OPTION_CLEAR:
5132             act = g_new0(ImgBitmapAction, 1);
5133             act->act = BITMAP_CLEAR;
5134             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5135             break;
5136         case OPTION_ENABLE:
5137             act = g_new0(ImgBitmapAction, 1);
5138             act->act = BITMAP_ENABLE;
5139             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5140             break;
5141         case OPTION_DISABLE:
5142             act = g_new0(ImgBitmapAction, 1);
5143             act->act = BITMAP_DISABLE;
5144             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5145             break;
5146         case OPTION_MERGE:
5147             act = g_new0(ImgBitmapAction, 1);
5148             act->act = BITMAP_MERGE;
5149             act->src = optarg;
5150             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5151             merge = true;
5152             break;
5153         case 'b':
5154             src_filename = optarg;
5155             break;
5156         case 'F':
5157             src_fmt = optarg;
5158             break;
5159         case OPTION_OBJECT:
5160             user_creatable_process_cmdline(optarg);
5161             break;
5162         default:
5163             tryhelp(argv[0]);
5164         }
5165     }
5166 
5167     if (QSIMPLEQ_EMPTY(&actions)) {
5168         error_report("Need at least one of --add, --remove, --clear, "
5169                      "--enable, --disable, or --merge");
5170         goto out;
5171     }
5172 
5173     if (granularity && !add) {
5174         error_report("granularity only supported with --add");
5175         goto out;
5176     }
5177     if (src_fmt && !src_filename) {
5178         error_report("-F only supported with -b");
5179         goto out;
5180     }
5181     if (src_filename && !merge) {
5182         error_report("Merge bitmap source file only supported with "
5183                      "--merge");
5184         goto out;
5185     }
5186 
5187     if (optind != argc - 2) {
5188         error_report("Expecting filename and bitmap name");
5189         goto out;
5190     }
5191 
5192     filename = argv[optind];
5193     bitmap = argv[optind + 1];
5194 
5195     /*
5196      * No need to open backing chains; we will be manipulating bitmaps
5197      * directly in this image without reference to image contents.
5198      */
5199     blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_NO_BACKING,
5200                    false, false, false);
5201     if (!blk) {
5202         goto out;
5203     }
5204     bs = blk_bs(blk);
5205     if (src_filename) {
5206         src = img_open(false, src_filename, src_fmt, BDRV_O_NO_BACKING,
5207                        false, false, false);
5208         if (!src) {
5209             goto out;
5210         }
5211         src_bs = blk_bs(src);
5212     } else {
5213         src_bs = bs;
5214     }
5215 
5216     QSIMPLEQ_FOREACH_SAFE(act, &actions, next, act_next) {
5217         switch (act->act) {
5218         case BITMAP_ADD:
5219             qmp_block_dirty_bitmap_add(bs->node_name, bitmap,
5220                                        !!granularity, granularity, true, true,
5221                                        false, false, &err);
5222             op = "add";
5223             break;
5224         case BITMAP_REMOVE:
5225             qmp_block_dirty_bitmap_remove(bs->node_name, bitmap, &err);
5226             op = "remove";
5227             break;
5228         case BITMAP_CLEAR:
5229             qmp_block_dirty_bitmap_clear(bs->node_name, bitmap, &err);
5230             op = "clear";
5231             break;
5232         case BITMAP_ENABLE:
5233             qmp_block_dirty_bitmap_enable(bs->node_name, bitmap, &err);
5234             op = "enable";
5235             break;
5236         case BITMAP_DISABLE:
5237             qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err);
5238             op = "disable";
5239             break;
5240         case BITMAP_MERGE:
5241             do_dirty_bitmap_merge(bs->node_name, bitmap, src_bs->node_name,
5242                                   act->src, &err);
5243             op = "merge";
5244             break;
5245         default:
5246             g_assert_not_reached();
5247         }
5248 
5249         if (err) {
5250             error_reportf_err(err, "Operation %s on bitmap %s failed: ",
5251                               op, bitmap);
5252             goto out;
5253         }
5254         g_free(act);
5255     }
5256 
5257     ret = 0;
5258 
5259  out:
5260     /*
5261      * Manually inactivate the images first because this way we can know whether
5262      * an error occurred. blk_unref() doesn't tell us about failures.
5263      */
5264     inactivate_ret = bdrv_inactivate_all();
5265     if (inactivate_ret < 0) {
5266         error_report("Error while closing the image: %s", strerror(-inactivate_ret));
5267         ret = 1;
5268     }
5269 
5270     blk_unref(src);
5271     blk_unref(blk);
5272     qemu_opts_del(opts);
5273     return ret;
5274 }
5275 
5276 #define C_BS      01
5277 #define C_COUNT   02
5278 #define C_IF      04
5279 #define C_OF      010
5280 #define C_SKIP    020
5281 
5282 struct DdInfo {
5283     unsigned int flags;
5284     int64_t count;
5285 };
5286 
5287 struct DdIo {
5288     int bsz;    /* Block size */
5289     char *filename;
5290     uint8_t *buf;
5291     int64_t offset;
5292 };
5293 
5294 struct DdOpts {
5295     const char *name;
5296     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
5297     unsigned int flag;
5298 };
5299 
5300 static int img_dd_bs(const char *arg,
5301                      struct DdIo *in, struct DdIo *out,
5302                      struct DdInfo *dd)
5303 {
5304     int64_t res;
5305 
5306     res = cvtnum_full("bs", arg, true, 1, INT_MAX);
5307 
5308     if (res < 0) {
5309         return 1;
5310     }
5311     in->bsz = out->bsz = res;
5312 
5313     return 0;
5314 }
5315 
5316 static int img_dd_count(const char *arg,
5317                         struct DdIo *in, struct DdIo *out,
5318                         struct DdInfo *dd)
5319 {
5320     dd->count = cvtnum("count", arg, true);
5321 
5322     if (dd->count < 0) {
5323         return 1;
5324     }
5325 
5326     return 0;
5327 }
5328 
5329 static int img_dd_if(const char *arg,
5330                      struct DdIo *in, struct DdIo *out,
5331                      struct DdInfo *dd)
5332 {
5333     in->filename = g_strdup(arg);
5334 
5335     return 0;
5336 }
5337 
5338 static int img_dd_of(const char *arg,
5339                      struct DdIo *in, struct DdIo *out,
5340                      struct DdInfo *dd)
5341 {
5342     out->filename = g_strdup(arg);
5343 
5344     return 0;
5345 }
5346 
5347 static int img_dd_skip(const char *arg,
5348                        struct DdIo *in, struct DdIo *out,
5349                        struct DdInfo *dd)
5350 {
5351     in->offset = cvtnum("skip", arg, true);
5352 
5353     if (in->offset < 0) {
5354         return 1;
5355     }
5356 
5357     return 0;
5358 }
5359 
5360 static int img_dd(const img_cmd_t *ccmd, int argc, char **argv)
5361 {
5362     int ret = 0;
5363     char *arg = NULL;
5364     char *tmp;
5365     BlockDriver *drv = NULL, *proto_drv = NULL;
5366     BlockBackend *blk1 = NULL, *blk2 = NULL;
5367     QemuOpts *opts = NULL;
5368     QemuOptsList *create_opts = NULL;
5369     Error *local_err = NULL;
5370     bool image_opts = false;
5371     int c, i;
5372     const char *out_fmt = "raw";
5373     const char *fmt = NULL;
5374     int64_t size = 0;
5375     int64_t out_pos, in_pos;
5376     bool force_share = false;
5377     struct DdInfo dd = {
5378         .flags = 0,
5379         .count = 0,
5380     };
5381     struct DdIo in = {
5382         .bsz = 512, /* Block size is by default 512 bytes */
5383         .filename = NULL,
5384         .buf = NULL,
5385         .offset = 0
5386     };
5387     struct DdIo out = {
5388         .bsz = 512,
5389         .filename = NULL,
5390         .buf = NULL,
5391         .offset = 0
5392     };
5393 
5394     const struct DdOpts options[] = {
5395         { "bs", img_dd_bs, C_BS },
5396         { "count", img_dd_count, C_COUNT },
5397         { "if", img_dd_if, C_IF },
5398         { "of", img_dd_of, C_OF },
5399         { "skip", img_dd_skip, C_SKIP },
5400         { NULL, NULL, 0 }
5401     };
5402     const struct option long_options[] = {
5403         { "help", no_argument, 0, 'h'},
5404         { "format", required_argument, 0, 'f'},
5405         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5406         { "output-format", required_argument, 0, 'O'},
5407         { "force-share", no_argument, 0, 'U'},
5408         { "object", required_argument, 0, OPTION_OBJECT},
5409         { 0, 0, 0, 0 }
5410     };
5411 
5412     while ((c = getopt_long(argc, argv, "hf:O:U", long_options, NULL))) {
5413         if (c == EOF) {
5414             break;
5415         }
5416         switch (c) {
5417         case 'h':
5418             cmd_help(ccmd, "[-f FMT|--image-opts] [-O OUTPUT_FMT] [-U]\n"
5419 "        [--object OBJDEF] [bs=BLOCK_SIZE] [count=BLOCKS] if=INPUT of=OUTPUT\n"
5420 ,
5421 "  -f, --format FMT\n"
5422 "     specify format for INPUT explicitly (default: probing is used)\n"
5423 "  --image-opts\n"
5424 "     treat INPUT as an option string (key=value,..), not a file name\n"
5425 "     (incompatible with -f|--format)\n"
5426 "  -O, --output-format OUTPUT_FMT\n"
5427 "     format of the OUTPUT (default: raw)\n"
5428 "  -U, --force-share\n"
5429 "     open images in shared mode for concurrent access\n"
5430 "  --object OBJDEF\n"
5431 "     defines QEMU user-creatable object\n"
5432 "  bs=BLOCK_SIZE[bKMGTP]\n"
5433 "     size of the I/O block, with optional multiplier suffix (powers of 1024)\n"
5434 "     (default: 512)\n"
5435 "  count=COUNT\n"
5436 "     number of blocks to convert (default whole INPUT)\n"
5437 "  if=INPUT\n"
5438 "     name of the file, or option string (key=value,..)\n"
5439 "     with --image-opts, to use for input\n"
5440 "  of=OUTPUT\n"
5441 "     output file name to create (will be overridden if alrady exists)\n"
5442 );
5443             break;
5444         case 'f':
5445             fmt = optarg;
5446             break;
5447         case OPTION_IMAGE_OPTS:
5448             image_opts = true;
5449             break;
5450         case 'O':
5451             out_fmt = optarg;
5452             break;
5453         case 'U':
5454             force_share = true;
5455             break;
5456         case OPTION_OBJECT:
5457             user_creatable_process_cmdline(optarg);
5458             break;
5459         default:
5460             tryhelp(argv[0]);
5461         }
5462     }
5463 
5464     for (i = optind; i < argc; i++) {
5465         int j;
5466         arg = g_strdup(argv[i]);
5467 
5468         tmp = strchr(arg, '=');
5469         if (tmp == NULL) {
5470             error_report("unrecognized operand %s", arg);
5471             ret = -1;
5472             goto out;
5473         }
5474 
5475         *tmp++ = '\0';
5476 
5477         for (j = 0; options[j].name != NULL; j++) {
5478             if (!strcmp(arg, options[j].name)) {
5479                 break;
5480             }
5481         }
5482         if (options[j].name == NULL) {
5483             error_report("unrecognized operand %s", arg);
5484             ret = -1;
5485             goto out;
5486         }
5487 
5488         if (options[j].f(tmp, &in, &out, &dd) != 0) {
5489             ret = -1;
5490             goto out;
5491         }
5492         dd.flags |= options[j].flag;
5493         g_free(arg);
5494         arg = NULL;
5495     }
5496 
5497     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
5498         error_report("Must specify both input and output files");
5499         ret = -1;
5500         goto out;
5501     }
5502 
5503     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
5504                     force_share);
5505 
5506     if (!blk1) {
5507         ret = -1;
5508         goto out;
5509     }
5510 
5511     drv = bdrv_find_format(out_fmt);
5512     if (!drv) {
5513         error_report("Unknown file format");
5514         ret = -1;
5515         goto out;
5516     }
5517     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
5518 
5519     if (!proto_drv) {
5520         error_report_err(local_err);
5521         ret = -1;
5522         goto out;
5523     }
5524     if (!drv->create_opts) {
5525         error_report("Format driver '%s' does not support image creation",
5526                      drv->format_name);
5527         ret = -1;
5528         goto out;
5529     }
5530     if (!proto_drv->create_opts) {
5531         error_report("Protocol driver '%s' does not support image creation",
5532                      proto_drv->format_name);
5533         ret = -1;
5534         goto out;
5535     }
5536     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5537     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5538 
5539     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5540 
5541     size = blk_getlength(blk1);
5542     if (size < 0) {
5543         error_report("Failed to get size for '%s'", in.filename);
5544         ret = -1;
5545         goto out;
5546     }
5547 
5548     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
5549         dd.count * in.bsz < size) {
5550         size = dd.count * in.bsz;
5551     }
5552 
5553     /* Overflow means the specified offset is beyond input image's size */
5554     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5555                               size < in.bsz * in.offset)) {
5556         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
5557     } else {
5558         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
5559                             size - in.bsz * in.offset, &error_abort);
5560     }
5561 
5562     ret = bdrv_create(drv, out.filename, opts, &local_err);
5563     if (ret < 0) {
5564         error_reportf_err(local_err,
5565                           "%s: error while creating output image: ",
5566                           out.filename);
5567         ret = -1;
5568         goto out;
5569     }
5570 
5571     /* TODO, we can't honour --image-opts for the target,
5572      * since it needs to be given in a format compatible
5573      * with the bdrv_create() call above which does not
5574      * support image-opts style.
5575      */
5576     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
5577                          false, false, false);
5578 
5579     if (!blk2) {
5580         ret = -1;
5581         goto out;
5582     }
5583 
5584     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5585                               size < in.offset * in.bsz)) {
5586         /* We give a warning if the skip option is bigger than the input
5587          * size and create an empty output disk image (i.e. like dd(1)).
5588          */
5589         error_report("%s: cannot skip to specified offset", in.filename);
5590         in_pos = size;
5591     } else {
5592         in_pos = in.offset * in.bsz;
5593     }
5594 
5595     in.buf = g_new(uint8_t, in.bsz);
5596 
5597     for (out_pos = 0; in_pos < size; ) {
5598         int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
5599 
5600         ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
5601         if (ret < 0) {
5602             error_report("error while reading from input image file: %s",
5603                          strerror(-ret));
5604             goto out;
5605         }
5606         in_pos += bytes;
5607 
5608         ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
5609         if (ret < 0) {
5610             error_report("error while writing to output image file: %s",
5611                          strerror(-ret));
5612             goto out;
5613         }
5614         out_pos += bytes;
5615     }
5616 
5617 out:
5618     g_free(arg);
5619     qemu_opts_del(opts);
5620     qemu_opts_free(create_opts);
5621     blk_unref(blk1);
5622     blk_unref(blk2);
5623     g_free(in.filename);
5624     g_free(out.filename);
5625     g_free(in.buf);
5626     g_free(out.buf);
5627 
5628     if (ret) {
5629         return 1;
5630     }
5631     return 0;
5632 }
5633 
5634 static void dump_json_block_measure_info(BlockMeasureInfo *info)
5635 {
5636     GString *str;
5637     QObject *obj;
5638     Visitor *v = qobject_output_visitor_new(&obj);
5639 
5640     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
5641     visit_complete(v, &obj);
5642     str = qobject_to_json_pretty(obj, true);
5643     assert(str != NULL);
5644     printf("%s\n", str->str);
5645     qobject_unref(obj);
5646     visit_free(v);
5647     g_string_free(str, true);
5648 }
5649 
5650 static int img_measure(const img_cmd_t *ccmd, int argc, char **argv)
5651 {
5652     OutputFormat output_format = OFORMAT_HUMAN;
5653     BlockBackend *in_blk = NULL;
5654     BlockDriver *drv;
5655     const char *filename = NULL;
5656     const char *fmt = NULL;
5657     const char *out_fmt = "raw";
5658     char *options = NULL;
5659     char *snapshot_name = NULL;
5660     bool force_share = false;
5661     QemuOpts *opts = NULL;
5662     QemuOpts *object_opts = NULL;
5663     QemuOpts *sn_opts = NULL;
5664     QemuOptsList *create_opts = NULL;
5665     bool image_opts = false;
5666     int64_t img_size = -1;
5667     BlockMeasureInfo *info = NULL;
5668     Error *local_err = NULL;
5669     int ret = 1;
5670     int c;
5671 
5672     static const struct option long_options[] = {
5673         {"help", no_argument, 0, 'h'},
5674         {"source-format", required_argument, 0, 'f'}, /* img_convert */
5675         {"format", required_argument, 0, 'f'},
5676         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5677         {"source-image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, /* img_convert */
5678         {"snapshot", required_argument, 0, 'l'},
5679         {"target-format", required_argument, 0, 'O'},
5680         {"target-format-options", required_argument, 0, 'o'}, /* img_convert */
5681         {"options", required_argument, 0, 'o'},
5682         {"force-share", no_argument, 0, 'U'},
5683         {"output", required_argument, 0, OPTION_OUTPUT},
5684         {"object", required_argument, 0, OPTION_OBJECT},
5685         {"size", required_argument, 0, 's'},
5686         {0, 0, 0, 0}
5687     };
5688 
5689     while ((c = getopt_long(argc, argv, "hf:l:O:o:Us:",
5690                             long_options, NULL)) != -1) {
5691         switch (c) {
5692         case 'h':
5693             cmd_help(ccmd, "[-f FMT|--image-opts] [-l SNAPSHOT]\n"
5694 "       [-O TARGET_FMT] [-o TARGET_FMT_OPTS] [--output human|json]\n"
5695 "       [--object OBJDEF] (--size SIZE | FILE)\n"
5696 ,
5697 "  -f, --format\n"
5698 "     specify format of FILE explicitly (default: probing is used)\n"
5699 "  --image-opts\n"
5700 "     indicates that FILE is a complete image specification\n"
5701 "     instead of a file name (incompatible with --format)\n"
5702 "  -l, --snapshot SNAPSHOT\n"
5703 "     use this snapshot in FILE as source\n"
5704 "  -O, --target-format TARGET_FMT\n"
5705 "     desired target/output image format (default: raw)\n"
5706 "  -o TARGET_FMT_OPTS\n"
5707 "     options specific to TARGET_FMT\n"
5708 "  --output human|json\n"
5709 "     output format (default: human)\n"
5710 "  -U, --force-share\n"
5711 "     open images in shared mode for concurrent access\n"
5712 "  --object OBJDEF\n"
5713 "     defines QEMU user-creatable object\n"
5714 "  -s, --size SIZE[bKMGTPE]\n"
5715 "     measure file size for given image size,\n"
5716 "     with optional multiplier suffix (powers of 1024)\n"
5717 "  FILE\n"
5718 "     measure file size required to convert from FILE (either a file name\n"
5719 "     or an option string (key=value,..) with --image-options)\n"
5720 );
5721             break;
5722         case 'f':
5723             fmt = optarg;
5724             break;
5725         case OPTION_IMAGE_OPTS:
5726             image_opts = true;
5727             break;
5728         case 'l':
5729             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
5730                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
5731                                                   optarg, false);
5732                 if (!sn_opts) {
5733                     error_report("Failed in parsing snapshot param '%s'",
5734                                  optarg);
5735                     goto out;
5736                 }
5737             } else {
5738                 snapshot_name = optarg;
5739             }
5740             break;
5741         case 'O':
5742             out_fmt = optarg;
5743             break;
5744         case 'o':
5745             if (accumulate_options(&options, optarg) < 0) {
5746                 goto out;
5747             }
5748             break;
5749         case 'U':
5750             force_share = true;
5751             break;
5752         case OPTION_OUTPUT:
5753             output_format = parse_output_format(argv[0], optarg);
5754             break;
5755         case OPTION_OBJECT:
5756             user_creatable_process_cmdline(optarg);
5757             break;
5758         case 's':
5759             img_size = cvtnum("image size", optarg, true);
5760             if (img_size < 0) {
5761                 goto out;
5762             }
5763             break;
5764         default:
5765             tryhelp(argv[0]);
5766         }
5767     }
5768 
5769     if (argc - optind > 1) {
5770         error_report("At most one filename argument is allowed.");
5771         goto out;
5772     } else if (argc - optind == 1) {
5773         filename = argv[optind];
5774     }
5775 
5776     if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
5777         error_report("--image-opts, -f, and -l require a filename argument.");
5778         goto out;
5779     }
5780     if (filename && img_size != -1) {
5781         error_report("--size N cannot be used together with a filename.");
5782         goto out;
5783     }
5784     if (!filename && img_size == -1) {
5785         error_report("Either --size N or one filename must be specified.");
5786         goto out;
5787     }
5788 
5789     if (filename) {
5790         in_blk = img_open(image_opts, filename, fmt, 0,
5791                           false, false, force_share);
5792         if (!in_blk) {
5793             goto out;
5794         }
5795 
5796         if (sn_opts) {
5797             bdrv_snapshot_load_tmp(blk_bs(in_blk),
5798                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
5799                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
5800                     &local_err);
5801         } else if (snapshot_name != NULL) {
5802             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
5803                     snapshot_name, &local_err);
5804         }
5805         if (local_err) {
5806             error_reportf_err(local_err, "Failed to load snapshot: ");
5807             goto out;
5808         }
5809     }
5810 
5811     drv = bdrv_find_format(out_fmt);
5812     if (!drv) {
5813         error_report("Unknown file format '%s'", out_fmt);
5814         goto out;
5815     }
5816     if (!drv->create_opts) {
5817         error_report("Format driver '%s' does not support image creation",
5818                      drv->format_name);
5819         goto out;
5820     }
5821 
5822     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5823     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
5824     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5825     if (options) {
5826         if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
5827             error_report_err(local_err);
5828             error_report("Invalid options for file format '%s'", out_fmt);
5829             goto out;
5830         }
5831     }
5832     if (img_size != -1) {
5833         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5834     }
5835 
5836     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
5837     if (local_err) {
5838         error_report_err(local_err);
5839         goto out;
5840     }
5841 
5842     if (output_format == OFORMAT_HUMAN) {
5843         printf("required size: %" PRIu64 "\n", info->required);
5844         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5845         if (info->has_bitmaps) {
5846             printf("bitmaps size: %" PRIu64 "\n", info->bitmaps);
5847         }
5848     } else {
5849         dump_json_block_measure_info(info);
5850     }
5851 
5852     ret = 0;
5853 
5854 out:
5855     qapi_free_BlockMeasureInfo(info);
5856     qemu_opts_del(object_opts);
5857     qemu_opts_del(opts);
5858     qemu_opts_del(sn_opts);
5859     qemu_opts_free(create_opts);
5860     g_free(options);
5861     blk_unref(in_blk);
5862     return ret;
5863 }
5864 
5865 static const img_cmd_t img_cmds[] = {
5866     { "amend", img_amend,
5867       "Update format-specific options of the image" },
5868     { "bench", img_bench,
5869       "Run a simple image benchmark" },
5870     { "bitmap", img_bitmap,
5871       "Perform modifications of the persistent bitmap in the image" },
5872     { "check", img_check,
5873       "Check basic image integrity" },
5874     { "commit", img_commit,
5875       "Commit image to its backing file" },
5876     { "compare", img_compare,
5877       "Check if two images have the same contents" },
5878     { "convert", img_convert,
5879       "Copy one or more images to another with optional format conversion" },
5880     { "create", img_create,
5881       "Create and format a new image file" },
5882     { "dd", img_dd,
5883       "Copy input to output with optional format conversion" },
5884     { "info", img_info,
5885       "Display information about the image" },
5886     { "map", img_map,
5887       "Dump image metadata" },
5888     { "measure", img_measure,
5889       "Calculate the file size required for a new image" },
5890     { "rebase", img_rebase,
5891       "Change the backing file of the image" },
5892     { "resize", img_resize,
5893       "Resize the image" },
5894     { "snapshot", img_snapshot,
5895       "List or manipulate snapshots in the image" },
5896     { NULL, NULL, },
5897 };
5898 
5899 static void format_print(void *opaque, const char *name)
5900 {
5901     int *np = opaque;
5902     if (*np + strlen(name) > 75) {
5903         printf("\n ");
5904         *np = 1;
5905     }
5906     *np += printf(" %s", name);
5907 }
5908 
5909 int main(int argc, char **argv)
5910 {
5911     const img_cmd_t *cmd;
5912     const char *cmdname;
5913     int c;
5914     static const struct option long_options[] = {
5915         {"help", no_argument, 0, 'h'},
5916         {"version", no_argument, 0, 'V'},
5917         {"trace", required_argument, NULL, 'T'},
5918         {0, 0, 0, 0}
5919     };
5920 
5921 #ifdef CONFIG_POSIX
5922     signal(SIGPIPE, SIG_IGN);
5923 #endif
5924 
5925     socket_init();
5926     error_init(argv[0]);
5927     module_call_init(MODULE_INIT_TRACE);
5928     qemu_init_exec_dir(argv[0]);
5929 
5930     qemu_init_main_loop(&error_fatal);
5931 
5932     qcrypto_init(&error_fatal);
5933 
5934     module_call_init(MODULE_INIT_QOM);
5935     bdrv_init();
5936 
5937     qemu_add_opts(&qemu_source_opts);
5938     qemu_add_opts(&qemu_trace_opts);
5939 
5940     while ((c = getopt_long(argc, argv, "+hVT:", long_options, NULL)) != -1) {
5941         switch (c) {
5942         case 'h':
5943             printf(
5944 QEMU_IMG_VERSION
5945 "QEMU disk image utility.  Usage:\n"
5946 "\n"
5947 "  qemu-img [standard options] COMMAND [--help | command options]\n"
5948 "\n"
5949 "Standard options:\n"
5950 "  -h, --help\n"
5951 "     display this help and exit\n"
5952 "  -V, --version\n"
5953 "     display version info and exit\n"
5954 "  -T,--trace TRACE\n"
5955 "     specify tracing options:\n"
5956 "        [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
5957 "\n"
5958 "Recognized commands (run qemu-img COMMAND --help for command-specific help):\n\n");
5959             for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5960                 printf("  %s - %s\n", cmd->name, cmd->description);
5961             }
5962             printf("\nSupported image formats:\n");
5963             c = 99; /* force a newline */
5964             bdrv_iterate_format(format_print, &c, false);
5965             if (c) {
5966                 printf("\n");
5967             }
5968             printf("\n" QEMU_HELP_BOTTOM "\n");
5969             return 0;
5970         case 'V':
5971             printf(QEMU_IMG_VERSION);
5972             return 0;
5973         case 'T':
5974             trace_opt_parse(optarg);
5975             break;
5976         default:
5977             tryhelp(argv[0]);
5978         }
5979     }
5980 
5981     if (optind >= argc) {
5982         error_exit(argv[0], "Not enough arguments");
5983     }
5984 
5985     cmdname = argv[optind];
5986 
5987     if (!trace_init_backends()) {
5988         exit(1);
5989     }
5990     trace_init_file();
5991     qemu_set_log(LOG_TRACE, &error_fatal);
5992 
5993     /* find the command */
5994     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5995         if (!strcmp(cmdname, cmd->name)) {
5996             g_autofree char *argv0 = g_strdup_printf("%s %s", argv[0], cmdname);
5997             /* reset options and getopt processing (incl return order) */
5998             argv += optind;
5999             argc -= optind;
6000             qemu_reset_optind();
6001             argv[0] = argv0;
6002             return cmd->handler(cmd, argc, argv);
6003         }
6004     }
6005 
6006     /* not found */
6007     error_exit(argv[0], "Command not found: %s", cmdname);
6008 }
6009