xref: /openbmc/qemu/qemu-img.c (revision 68ff2eeb299d562e437b49e9bb98f9d6f62fbf06)
1 /*
2  * QEMU disk image utility
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 #include <getopt.h>
27 
28 #include "qemu/help-texts.h"
29 #include "qemu/qemu-progress.h"
30 #include "qemu-version.h"
31 #include "qapi/error.h"
32 #include "qapi/qapi-commands-block-core.h"
33 #include "qapi/qapi-visit-block-core.h"
34 #include "qapi/qobject-output-visitor.h"
35 #include "qobject/qjson.h"
36 #include "qobject/qdict.h"
37 #include "qemu/cutils.h"
38 #include "qemu/config-file.h"
39 #include "qemu/option.h"
40 #include "qemu/error-report.h"
41 #include "qemu/log.h"
42 #include "qemu/main-loop.h"
43 #include "qemu/module.h"
44 #include "qemu/sockets.h"
45 #include "qemu/units.h"
46 #include "qemu/memalign.h"
47 #include "qom/object_interfaces.h"
48 #include "system/block-backend.h"
49 #include "block/block_int.h"
50 #include "block/blockjob.h"
51 #include "block/dirty-bitmap.h"
52 #include "block/qapi.h"
53 #include "crypto/init.h"
54 #include "trace/control.h"
55 #include "qemu/throttle.h"
56 #include "block/throttle-groups.h"
57 
58 #define QEMU_IMG_VERSION "qemu-img version " QEMU_FULL_VERSION \
59                           "\n" QEMU_COPYRIGHT "\n"
60 
61 typedef struct img_cmd_t {
62     const char *name;
63     int (*handler)(const struct img_cmd_t *ccmd, int argc, char **argv);
64     const char *description;
65 } img_cmd_t;
66 
67 enum {
68     OPTION_OUTPUT = 256,
69     OPTION_BACKING_CHAIN = 257,
70     OPTION_OBJECT = 258,
71     OPTION_IMAGE_OPTS = 259,
72     OPTION_PATTERN = 260,
73     OPTION_FLUSH_INTERVAL = 261,
74     OPTION_NO_DRAIN = 262,
75     OPTION_TARGET_IMAGE_OPTS = 263,
76     OPTION_PREALLOCATION = 265,
77     OPTION_SHRINK = 266,
78     OPTION_SALVAGE = 267,
79     OPTION_TARGET_IS_ZERO = 268,
80     OPTION_ADD = 269,
81     OPTION_REMOVE = 270,
82     OPTION_CLEAR = 271,
83     OPTION_ENABLE = 272,
84     OPTION_DISABLE = 273,
85     OPTION_MERGE = 274,
86     OPTION_BITMAPS = 275,
87     OPTION_FORCE = 276,
88     OPTION_SKIP_BROKEN = 277,
89 };
90 
91 typedef enum OutputFormat {
92     OFORMAT_JSON,
93     OFORMAT_HUMAN,
94 } OutputFormat;
95 
96 /* Default to cache=writeback as data integrity is not important for qemu-img */
97 #define BDRV_DEFAULT_CACHE "writeback"
98 
99 static G_NORETURN
tryhelp(const char * argv0)100 void tryhelp(const char *argv0)
101 {
102     error_printf("Try '%s --help' for more information\n", argv0);
103     exit(EXIT_FAILURE);
104 }
105 
106 static G_NORETURN G_GNUC_PRINTF(2, 3)
error_exit(const char * argv0,const char * fmt,...)107 void error_exit(const char *argv0, const char *fmt, ...)
108 {
109     va_list ap;
110 
111     va_start(ap, fmt);
112     error_vreport(fmt, ap);
113     va_end(ap);
114 
115     tryhelp(argv0);
116 }
117 
118 /*
119  * Print --help output for a command and exit.
120  * @syntax and @description are multi-line with trailing EOL
121  * (to allow easy extending of the text)
122  * @syntax has each subsequent line indented by 8 chars.
123  * @description is indented by 2 chars for argument on each own line,
124  * and with 5 chars for argument description (like -h arg below).
125  */
126 static G_NORETURN
cmd_help(const img_cmd_t * ccmd,const char * syntax,const char * arguments)127 void cmd_help(const img_cmd_t *ccmd,
128               const char *syntax, const char *arguments)
129 {
130     printf(
131 "Usage:\n"
132 "  %s %s %s\n"
133 "%s.\n"
134 "\n"
135 "Arguments:\n"
136 "  -h, --help\n"
137 "     print this help and exit\n"
138 "%s\n",
139            "qemu-img", ccmd->name, syntax, ccmd->description, arguments);
140     exit(EXIT_SUCCESS);
141 }
142 
parse_output_format(const char * argv0,const char * arg)143 static OutputFormat parse_output_format(const char *argv0, const char *arg)
144 {
145     if (!strcmp(arg, "json")) {
146         return OFORMAT_JSON;
147     } else if (!strcmp(arg, "human")) {
148         return OFORMAT_HUMAN;
149     } else {
150         error_exit(argv0, "--output expects 'human' or 'json', not '%s'", arg);
151     }
152 }
153 
154 /*
155  * Is @list safe for accumulate_options()?
156  * It is when multiple of them can be joined together separated by ','.
157  * To make that work, @list must not start with ',' (or else a
158  * separating ',' preceding it gets escaped), and it must not end with
159  * an odd number of ',' (or else a separating ',' following it gets
160  * escaped), or be empty (or else a separating ',' preceding it can
161  * escape a separating ',' following it).
162  *
163  */
is_valid_option_list(const char * list)164 static bool is_valid_option_list(const char *list)
165 {
166     size_t len = strlen(list);
167     size_t i;
168 
169     if (!list[0] || list[0] == ',') {
170         return false;
171     }
172 
173     for (i = len; i > 0 && list[i - 1] == ','; i--) {
174     }
175     if ((len - i) % 2) {
176         return false;
177     }
178 
179     return true;
180 }
181 
accumulate_options(char ** options,char * list)182 static int accumulate_options(char **options, char *list)
183 {
184     char *new_options;
185 
186     if (!is_valid_option_list(list)) {
187         error_report("Invalid option list: %s", list);
188         return -1;
189     }
190 
191     if (!*options) {
192         *options = g_strdup(list);
193     } else {
194         new_options = g_strdup_printf("%s,%s", *options, list);
195         g_free(*options);
196         *options = new_options;
197     }
198     return 0;
199 }
200 
201 static QemuOptsList qemu_source_opts = {
202     .name = "source",
203     .implied_opt_name = "file",
204     .head = QTAILQ_HEAD_INITIALIZER(qemu_source_opts.head),
205     .desc = {
206         { }
207     },
208 };
209 
qprintf(bool quiet,const char * fmt,...)210 static int G_GNUC_PRINTF(2, 3) qprintf(bool quiet, const char *fmt, ...)
211 {
212     int ret = 0;
213     if (!quiet) {
214         va_list args;
215         va_start(args, fmt);
216         ret = vprintf(fmt, args);
217         va_end(args);
218     }
219     return ret;
220 }
221 
222 
print_block_option_help(const char * filename,const char * fmt)223 static int print_block_option_help(const char *filename, const char *fmt)
224 {
225     BlockDriver *drv, *proto_drv;
226     QemuOptsList *create_opts = NULL;
227     Error *local_err = NULL;
228 
229     /* Find driver and parse its options */
230     drv = bdrv_find_format(fmt);
231     if (!drv) {
232         error_report("Unknown file format '%s'", fmt);
233         return 1;
234     }
235 
236     if (!drv->create_opts) {
237         error_report("Format driver '%s' does not support image creation", fmt);
238         return 1;
239     }
240 
241     create_opts = qemu_opts_append(create_opts, drv->create_opts);
242     if (filename) {
243         proto_drv = bdrv_find_protocol(filename, true, &local_err);
244         if (!proto_drv) {
245             error_report_err(local_err);
246             qemu_opts_free(create_opts);
247             return 1;
248         }
249         if (!proto_drv->create_opts) {
250             error_report("Protocol driver '%s' does not support image creation",
251                          proto_drv->format_name);
252             qemu_opts_free(create_opts);
253             return 1;
254         }
255         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
256     }
257 
258     if (filename) {
259         printf("Supported options:\n");
260     } else {
261         printf("Supported %s options:\n", fmt);
262     }
263     qemu_opts_print_help(create_opts, false);
264     qemu_opts_free(create_opts);
265 
266     if (!filename) {
267         printf("\n"
268                "The protocol level may support further options.\n"
269                "Specify the target filename to include those options.\n");
270     }
271 
272     return 0;
273 }
274 
275 
img_open_opts(const char * optstr,QemuOpts * opts,int flags,bool writethrough,bool quiet,bool force_share)276 static BlockBackend *img_open_opts(const char *optstr,
277                                    QemuOpts *opts, int flags, bool writethrough,
278                                    bool quiet, bool force_share)
279 {
280     QDict *options;
281     Error *local_err = NULL;
282     BlockBackend *blk;
283     options = qemu_opts_to_qdict(opts, NULL);
284     if (force_share) {
285         if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
286             && strcmp(qdict_get_str(options, BDRV_OPT_FORCE_SHARE), "on")) {
287             error_report("--force-share/-U conflicts with image options");
288             qobject_unref(options);
289             return NULL;
290         }
291         qdict_put_str(options, BDRV_OPT_FORCE_SHARE, "on");
292     }
293     blk = blk_new_open(NULL, NULL, options, flags, &local_err);
294     if (!blk) {
295         error_reportf_err(local_err, "Could not open '%s': ", optstr);
296         return NULL;
297     }
298     blk_set_enable_write_cache(blk, !writethrough);
299 
300     return blk;
301 }
302 
img_open_file(const char * filename,QDict * options,const char * fmt,int flags,bool writethrough,bool quiet,bool force_share)303 static BlockBackend *img_open_file(const char *filename,
304                                    QDict *options,
305                                    const char *fmt, int flags,
306                                    bool writethrough, bool quiet,
307                                    bool force_share)
308 {
309     BlockBackend *blk;
310     Error *local_err = NULL;
311 
312     if (!options) {
313         options = qdict_new();
314     }
315     if (fmt) {
316         qdict_put_str(options, "driver", fmt);
317     }
318 
319     if (force_share) {
320         qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
321     }
322     blk = blk_new_open(filename, NULL, options, flags, &local_err);
323     if (!blk) {
324         error_reportf_err(local_err, "Could not open '%s': ", filename);
325         return NULL;
326     }
327     blk_set_enable_write_cache(blk, !writethrough);
328 
329     return blk;
330 }
331 
332 
img_add_key_secrets(void * opaque,const char * name,const char * value,Error ** errp)333 static int img_add_key_secrets(void *opaque,
334                                const char *name, const char *value,
335                                Error **errp)
336 {
337     QDict *options = opaque;
338 
339     if (g_str_has_suffix(name, "key-secret")) {
340         qdict_put_str(options, name, value);
341     }
342 
343     return 0;
344 }
345 
346 
img_open(bool image_opts,const char * filename,const char * fmt,int flags,bool writethrough,bool quiet,bool force_share)347 static BlockBackend *img_open(bool image_opts,
348                               const char *filename,
349                               const char *fmt, int flags, bool writethrough,
350                               bool quiet, bool force_share)
351 {
352     BlockBackend *blk;
353     if (image_opts) {
354         QemuOpts *opts;
355         if (fmt) {
356             error_report("--image-opts and --format are mutually exclusive");
357             return NULL;
358         }
359         opts = qemu_opts_parse_noisily(qemu_find_opts("source"),
360                                        filename, true);
361         if (!opts) {
362             return NULL;
363         }
364         blk = img_open_opts(filename, opts, flags, writethrough, quiet,
365                             force_share);
366     } else {
367         blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
368                             force_share);
369     }
370 
371     if (blk) {
372         blk_set_force_allow_inactivate(blk);
373     }
374 
375     return blk;
376 }
377 
378 
add_old_style_options(const char * fmt,QemuOpts * opts,const char * base_filename,const char * base_fmt)379 static int add_old_style_options(const char *fmt, QemuOpts *opts,
380                                  const char *base_filename,
381                                  const char *base_fmt)
382 {
383     if (base_filename) {
384         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
385                           NULL)) {
386             error_report("Backing file not supported for file format '%s'",
387                          fmt);
388             return -1;
389         }
390     }
391     if (base_fmt) {
392         if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
393             error_report("Backing file format not supported for file "
394                          "format '%s'", fmt);
395             return -1;
396         }
397     }
398     return 0;
399 }
400 
cvtnum_full(const char * name,const char * value,bool is_size,int64_t min,int64_t max)401 static int64_t cvtnum_full(const char *name, const char *value,
402                            bool is_size, int64_t min, int64_t max)
403 {
404     int err;
405     uint64_t res;
406 
407     err = is_size ? qemu_strtosz(value, NULL, &res) :
408                     qemu_strtou64(value, NULL, 0, &res);
409     if (err < 0 && err != -ERANGE) {
410         error_report("Invalid %s specified: '%s'", name, value);
411         return err;
412     }
413     if (err == -ERANGE || res > max || res < min) {
414         error_report("Invalid %s specified. Must be between %" PRId64
415                      " and %" PRId64 ".", name, min, max);
416         return -ERANGE;
417     }
418     return res;
419 }
420 
cvtnum(const char * name,const char * value,bool is_size)421 static int64_t cvtnum(const char *name, const char *value, bool is_size)
422 {
423     return cvtnum_full(name, value, is_size, 0, INT64_MAX);
424 }
425 
img_create(const img_cmd_t * ccmd,int argc,char ** argv)426 static int img_create(const img_cmd_t *ccmd, int argc, char **argv)
427 {
428     int c;
429     int64_t img_size = -1;
430     const char *fmt = "raw";
431     const char *base_fmt = NULL;
432     const char *filename;
433     const char *base_filename = NULL;
434     char *options = NULL;
435     Error *local_err = NULL;
436     bool quiet = false;
437     int flags = 0;
438 
439     for(;;) {
440         static const struct option long_options[] = {
441             {"help", no_argument, 0, 'h'},
442             {"format", required_argument, 0, 'f'},
443             {"options", required_argument, 0, 'o'},
444             {"backing", required_argument, 0, 'b'},
445             {"backing-format", required_argument, 0, 'B'}, /* was -F in 10.0 */
446             {"backing-unsafe", no_argument, 0, 'u'},
447             {"quiet", no_argument, 0, 'q'},
448             {"object", required_argument, 0, OPTION_OBJECT},
449             {0, 0, 0, 0}
450         };
451         c = getopt_long(argc, argv, "hf:o:b:F:B:uq",
452                         long_options, NULL);
453         if (c == -1) {
454             break;
455         }
456         switch(c) {
457         case 'h':
458             cmd_help(ccmd, "[-f FMT] [-o FMT_OPTS]\n"
459 "        [-b BACKING_FILE [-B BACKING_FMT]] [-u]\n"
460 "        [-q] [--object OBJDEF] FILE [SIZE]\n"
461 ,
462 "  -f, --format FMT\n"
463 "     specifies the format of the new image (default: raw)\n"
464 "  -o, --options FMT_OPTS\n"
465 "     format-specific options (specify '-o help' for help)\n"
466 "  -b, --backing BACKING_FILE\n"
467 "     create target image to be a CoW on top of BACKING_FILE\n"
468 "  -B, --backing-format BACKING_FMT (was -F in <= 10.0)\n"
469 "     specifies the format of BACKING_FILE (default: probing is used)\n"
470 "  -u, --backing-unsafe\n"
471 "     do not fail if BACKING_FILE can not be read\n"
472 "  -q, --quiet\n"
473 "     quiet mode (produce only error messages if any)\n"
474 "  --object OBJDEF\n"
475 "     defines QEMU user-creatable object\n"
476 "  FILE\n"
477 "     name of the image file to create (will be overritten if already exists)\n"
478 "  SIZE[bKMGTPE]\n"
479 "     image size with optional multiplier suffix (powers of 1024)\n"
480 "     (required unless BACKING_FILE is specified)\n"
481 );
482             break;
483         case 'f':
484             fmt = optarg;
485             break;
486         case 'o':
487             if (accumulate_options(&options, optarg) < 0) {
488                 goto fail;
489             }
490             break;
491         case 'b':
492             base_filename = optarg;
493             break;
494         case 'F': /* <=10.0 */
495         case 'B':
496             base_fmt = optarg;
497             break;
498         case 'u':
499             flags |= BDRV_O_NO_BACKING;
500             break;
501         case 'q':
502             quiet = true;
503             break;
504         case OPTION_OBJECT:
505             user_creatable_process_cmdline(optarg);
506             break;
507         default:
508             tryhelp(argv[0]);
509         }
510     }
511 
512     /* Get the filename */
513     filename = (optind < argc) ? argv[optind] : NULL;
514     if (options && has_help_option(options)) {
515         g_free(options);
516         return print_block_option_help(filename, fmt);
517     }
518 
519     if (optind >= argc) {
520         error_exit(argv[0], "Expecting image file name");
521     }
522     optind++;
523 
524     /* Get image size, if specified */
525     if (optind < argc) {
526         img_size = cvtnum("image size", argv[optind++], true);
527         if (img_size < 0) {
528             goto fail;
529         }
530     }
531     if (optind != argc) {
532         error_exit(argv[0], "Unexpected argument: %s", argv[optind]);
533     }
534 
535     bdrv_img_create(filename, fmt, base_filename, base_fmt,
536                     options, img_size, flags, quiet, &local_err);
537     if (local_err) {
538         error_reportf_err(local_err, "%s: ", filename);
539         goto fail;
540     }
541 
542     g_free(options);
543     return 0;
544 
545 fail:
546     g_free(options);
547     return 1;
548 }
549 
dump_json_image_check(ImageCheck * check,bool quiet)550 static void dump_json_image_check(ImageCheck *check, bool quiet)
551 {
552     GString *str;
553     QObject *obj;
554     Visitor *v = qobject_output_visitor_new(&obj);
555 
556     visit_type_ImageCheck(v, NULL, &check, &error_abort);
557     visit_complete(v, &obj);
558     str = qobject_to_json_pretty(obj, true);
559     assert(str != NULL);
560     qprintf(quiet, "%s\n", str->str);
561     qobject_unref(obj);
562     visit_free(v);
563     g_string_free(str, true);
564 }
565 
dump_human_image_check(ImageCheck * check,bool quiet)566 static void dump_human_image_check(ImageCheck *check, bool quiet)
567 {
568     if (!(check->corruptions || check->leaks || check->check_errors)) {
569         qprintf(quiet, "No errors were found on the image.\n");
570     } else {
571         if (check->corruptions) {
572             qprintf(quiet, "\n%" PRId64 " errors were found on the image.\n"
573                     "Data may be corrupted, or further writes to the image "
574                     "may corrupt it.\n",
575                     check->corruptions);
576         }
577 
578         if (check->leaks) {
579             qprintf(quiet,
580                     "\n%" PRId64 " leaked clusters were found on the image.\n"
581                     "This means waste of disk space, but no harm to data.\n",
582                     check->leaks);
583         }
584 
585         if (check->check_errors) {
586             qprintf(quiet,
587                     "\n%" PRId64
588                     " internal errors have occurred during the check.\n",
589                     check->check_errors);
590         }
591     }
592 
593     if (check->total_clusters != 0 && check->allocated_clusters != 0) {
594         qprintf(quiet, "%" PRId64 "/%" PRId64 " = %0.2f%% allocated, "
595                 "%0.2f%% fragmented, %0.2f%% compressed clusters\n",
596                 check->allocated_clusters, check->total_clusters,
597                 check->allocated_clusters * 100.0 / check->total_clusters,
598                 check->fragmented_clusters * 100.0 / check->allocated_clusters,
599                 check->compressed_clusters * 100.0 /
600                 check->allocated_clusters);
601     }
602 
603     if (check->image_end_offset) {
604         qprintf(quiet,
605                 "Image end offset: %" PRId64 "\n", check->image_end_offset);
606     }
607 }
608 
collect_image_check(BlockDriverState * bs,ImageCheck * check,const char * filename,const char * fmt,int fix)609 static int collect_image_check(BlockDriverState *bs,
610                    ImageCheck *check,
611                    const char *filename,
612                    const char *fmt,
613                    int fix)
614 {
615     int ret;
616     BdrvCheckResult result;
617 
618     ret = bdrv_check(bs, &result, fix);
619     if (ret < 0) {
620         return ret;
621     }
622 
623     check->filename                 = g_strdup(filename);
624     check->format                   = g_strdup(bdrv_get_format_name(bs));
625     check->check_errors             = result.check_errors;
626     check->corruptions              = result.corruptions;
627     check->has_corruptions          = result.corruptions != 0;
628     check->leaks                    = result.leaks;
629     check->has_leaks                = result.leaks != 0;
630     check->corruptions_fixed        = result.corruptions_fixed;
631     check->has_corruptions_fixed    = result.corruptions_fixed != 0;
632     check->leaks_fixed              = result.leaks_fixed;
633     check->has_leaks_fixed          = result.leaks_fixed != 0;
634     check->image_end_offset         = result.image_end_offset;
635     check->has_image_end_offset     = result.image_end_offset != 0;
636     check->total_clusters           = result.bfi.total_clusters;
637     check->has_total_clusters       = result.bfi.total_clusters != 0;
638     check->allocated_clusters       = result.bfi.allocated_clusters;
639     check->has_allocated_clusters   = result.bfi.allocated_clusters != 0;
640     check->fragmented_clusters      = result.bfi.fragmented_clusters;
641     check->has_fragmented_clusters  = result.bfi.fragmented_clusters != 0;
642     check->compressed_clusters      = result.bfi.compressed_clusters;
643     check->has_compressed_clusters  = result.bfi.compressed_clusters != 0;
644 
645     return 0;
646 }
647 
648 /*
649  * Checks an image for consistency. Exit codes:
650  *
651  *  0 - Check completed, image is good
652  *  1 - Check not completed because of internal errors
653  *  2 - Check completed, image is corrupted
654  *  3 - Check completed, image has leaked clusters, but is good otherwise
655  * 63 - Checks are not supported by the image format
656  */
img_check(const img_cmd_t * ccmd,int argc,char ** argv)657 static int img_check(const img_cmd_t *ccmd, int argc, char **argv)
658 {
659     int c, ret;
660     OutputFormat output_format = OFORMAT_HUMAN;
661     const char *filename, *fmt, *cache;
662     BlockBackend *blk;
663     BlockDriverState *bs;
664     int fix = 0;
665     int flags = BDRV_O_CHECK;
666     bool writethrough;
667     ImageCheck *check;
668     bool quiet = false;
669     bool image_opts = false;
670     bool force_share = false;
671 
672     fmt = NULL;
673     cache = BDRV_DEFAULT_CACHE;
674 
675     for(;;) {
676         int option_index = 0;
677         static const struct option long_options[] = {
678             {"help", no_argument, 0, 'h'},
679             {"format", required_argument, 0, 'f'},
680             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
681             {"cache", required_argument, 0, 'T'},
682             {"repair", required_argument, 0, 'r'},
683             {"force-share", no_argument, 0, 'U'},
684             {"output", required_argument, 0, OPTION_OUTPUT},
685             {"quiet", no_argument, 0, 'q'},
686             {"object", required_argument, 0, OPTION_OBJECT},
687             {0, 0, 0, 0}
688         };
689         c = getopt_long(argc, argv, "hf:T:r:Uq",
690                         long_options, &option_index);
691         if (c == -1) {
692             break;
693         }
694         switch(c) {
695         case 'h':
696             cmd_help(ccmd, "[-f FMT | --image-opts] [-T CACHE_MODE] [-r leaks|all]\n"
697 "        [-U] [--output human|json] [-q] [--object OBJDEF] FILE\n"
698 ,
699 "  -f, --format FMT\n"
700 "     specifies the format of the image explicitly (default: probing is used)\n"
701 "  --image-opts\n"
702 "     treat FILE as an option string (key=value,..), not a file name\n"
703 "     (incompatible with -f|--format)\n"
704 "  -T, --cache CACHE_MODE\n" /* why not -t ? */
705 "     cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
706 "  -r, --repair leaks|all\n"
707 "     repair errors of the given category in the image (image will be\n"
708 "     opened in read-write mode, incompatible with -U|--force-share)\n"
709 "  -U, --force-share\n"
710 "     open image in shared mode for concurrent access\n"
711 "  --output human|json\n"
712 "     output format (default: human)\n"
713 "  -q, --quiet\n"
714 "     quiet mode (produce only error messages if any)\n"
715 "  --object OBJDEF\n"
716 "     defines QEMU user-creatable object\n"
717 "  FILE\n"
718 "     name of the image file, or an option string (key=value,..)\n"
719 "     with --image-opts, to operate on\n"
720 );
721             break;
722         case 'f':
723             fmt = optarg;
724             break;
725         case OPTION_IMAGE_OPTS:
726             image_opts = true;
727             break;
728         case 'T':
729             cache = optarg;
730             break;
731         case 'r':
732             flags |= BDRV_O_RDWR;
733 
734             if (!strcmp(optarg, "leaks")) {
735                 fix = BDRV_FIX_LEAKS;
736             } else if (!strcmp(optarg, "all")) {
737                 fix = BDRV_FIX_LEAKS | BDRV_FIX_ERRORS;
738             } else {
739                 error_exit(argv[0],
740                            "--repair (-r) expects 'leaks' or 'all', not '%s'",
741                            optarg);
742             }
743             break;
744         case 'U':
745             force_share = true;
746             break;
747         case OPTION_OUTPUT:
748             output_format = parse_output_format(argv[0], optarg);
749             break;
750         case 'q':
751             quiet = true;
752             break;
753         case OPTION_OBJECT:
754             user_creatable_process_cmdline(optarg);
755             break;
756         default:
757             tryhelp(argv[0]);
758         }
759     }
760     if (optind != argc - 1) {
761         error_exit(argv[0], "Expecting one image file name");
762     }
763     filename = argv[optind++];
764 
765     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
766     if (ret < 0) {
767         error_report("Invalid source cache option: %s", cache);
768         return 1;
769     }
770 
771     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
772                    force_share);
773     if (!blk) {
774         return 1;
775     }
776     bs = blk_bs(blk);
777 
778     check = g_new0(ImageCheck, 1);
779     ret = collect_image_check(bs, check, filename, fmt, fix);
780 
781     if (ret == -ENOTSUP) {
782         error_report("This image format does not support checks");
783         ret = 63;
784         goto fail;
785     }
786 
787     if (check->corruptions_fixed || check->leaks_fixed) {
788         int corruptions_fixed, leaks_fixed;
789         bool has_leaks_fixed, has_corruptions_fixed;
790 
791         leaks_fixed         = check->leaks_fixed;
792         has_leaks_fixed     = check->has_leaks_fixed;
793         corruptions_fixed   = check->corruptions_fixed;
794         has_corruptions_fixed = check->has_corruptions_fixed;
795 
796         if (output_format == OFORMAT_HUMAN) {
797             qprintf(quiet,
798                     "The following inconsistencies were found and repaired:\n\n"
799                     "    %" PRId64 " leaked clusters\n"
800                     "    %" PRId64 " corruptions\n\n"
801                     "Double checking the fixed image now...\n",
802                     check->leaks_fixed,
803                     check->corruptions_fixed);
804         }
805 
806         qapi_free_ImageCheck(check);
807         check = g_new0(ImageCheck, 1);
808         ret = collect_image_check(bs, check, filename, fmt, 0);
809 
810         check->leaks_fixed          = leaks_fixed;
811         check->has_leaks_fixed      = has_leaks_fixed;
812         check->corruptions_fixed    = corruptions_fixed;
813         check->has_corruptions_fixed = has_corruptions_fixed;
814     }
815 
816     if (!ret) {
817         switch (output_format) {
818         case OFORMAT_HUMAN:
819             dump_human_image_check(check, quiet);
820             break;
821         case OFORMAT_JSON:
822             dump_json_image_check(check, quiet);
823             break;
824         }
825     }
826 
827     if (ret || check->check_errors) {
828         if (ret) {
829             error_report("Check failed: %s", strerror(-ret));
830         } else {
831             error_report("Check failed");
832         }
833         ret = 1;
834         goto fail;
835     }
836 
837     if (check->corruptions) {
838         ret = 2;
839     } else if (check->leaks) {
840         ret = 3;
841     } else {
842         ret = 0;
843     }
844 
845 fail:
846     qapi_free_ImageCheck(check);
847     blk_unref(blk);
848     return ret;
849 }
850 
851 typedef struct CommonBlockJobCBInfo {
852     BlockDriverState *bs;
853     Error **errp;
854 } CommonBlockJobCBInfo;
855 
common_block_job_cb(void * opaque,int ret)856 static void common_block_job_cb(void *opaque, int ret)
857 {
858     CommonBlockJobCBInfo *cbi = opaque;
859 
860     if (ret < 0) {
861         error_setg_errno(cbi->errp, -ret, "Block job failed");
862     }
863 }
864 
run_block_job(BlockJob * job,Error ** errp)865 static void run_block_job(BlockJob *job, Error **errp)
866 {
867     uint64_t progress_current, progress_total;
868     AioContext *aio_context = block_job_get_aio_context(job);
869     int ret = 0;
870 
871     job_lock();
872     job_ref_locked(&job->job);
873     do {
874         float progress = 0.0f;
875         job_unlock();
876         aio_poll(aio_context, true);
877 
878         progress_get_snapshot(&job->job.progress, &progress_current,
879                               &progress_total);
880         if (progress_total) {
881             progress = (float)progress_current / progress_total * 100.f;
882         }
883         qemu_progress_print(progress, 0);
884         job_lock();
885     } while (!job_is_ready_locked(&job->job) &&
886              !job_is_completed_locked(&job->job));
887 
888     if (!job_is_completed_locked(&job->job)) {
889         ret = job_complete_sync_locked(&job->job, errp);
890     } else {
891         ret = job->job.ret;
892     }
893     job_unref_locked(&job->job);
894     job_unlock();
895 
896     /* publish completion progress only when success */
897     if (!ret) {
898         qemu_progress_print(100.f, 0);
899     }
900 }
901 
img_commit(const img_cmd_t * ccmd,int argc,char ** argv)902 static int img_commit(const img_cmd_t *ccmd, int argc, char **argv)
903 {
904     int c, ret, flags;
905     const char *filename, *fmt, *cache, *base;
906     BlockBackend *blk;
907     BlockDriverState *bs, *base_bs;
908     BlockJob *job;
909     bool progress = false, quiet = false, drop = false;
910     bool writethrough;
911     Error *local_err = NULL;
912     CommonBlockJobCBInfo cbi;
913     bool image_opts = false;
914     int64_t rate_limit = 0;
915 
916     fmt = NULL;
917     cache = BDRV_DEFAULT_CACHE;
918     base = NULL;
919     for(;;) {
920         static const struct option long_options[] = {
921             {"help", no_argument, 0, 'h'},
922             {"format", required_argument, 0, 'f'},
923             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
924             {"cache", required_argument, 0, 't'},
925             {"drop", no_argument, 0, 'd'},
926             {"base", required_argument, 0, 'b'},
927             {"rate-limit", required_argument, 0, 'r'},
928             {"progress", no_argument, 0, 'p'},
929             {"quiet", no_argument, 0, 'q'},
930             {"object", required_argument, 0, OPTION_OBJECT},
931             {0, 0, 0, 0}
932         };
933         c = getopt_long(argc, argv, "hf:t:db:r:pq",
934                         long_options, NULL);
935         if (c == -1) {
936             break;
937         }
938         switch(c) {
939         case 'h':
940             cmd_help(ccmd, "[-f FMT | --image-opts] [-t CACHE_MODE] [-b BASE_IMG]\n"
941 "        [-d] [-r RATE] [-q] [--object OBJDEF] FILE\n"
942 ,
943 "  -f, --format FMT\n"
944 "     specify FILE image format explicitly (default: probing is used)\n"
945 "  --image-opts\n"
946 "     treat FILE as an option string (key=value,..), not a file name\n"
947 "     (incompatible with -f|--format)\n"
948 "  -t, --cache CACHE_MODE image cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
949 "  -d, --drop\n"
950 "     skip emptying FILE on completion\n"
951 "  -b, --base BASE_IMG\n"
952 "     image in the backing chain to commit change to\n"
953 "     (default: immediate backing file; implies --drop)\n"
954 "  -r, --rate-limit RATE\n"
955 "     I/O rate limit, in bytes per second\n"
956 "  -p, --progress\n"
957 "     display progress information\n"
958 "  -q, --quiet\n"
959 "     quiet mode (produce only error messages if any)\n"
960 "  --object OBJDEF\n"
961 "     defines QEMU user-creatable object\n"
962 "  FILE\n"
963 "     name of the image file, or an option string (key=value,..)\n"
964 "     with --image-opts, to operate on\n"
965 );
966             break;
967         case 'f':
968             fmt = optarg;
969             break;
970         case OPTION_IMAGE_OPTS:
971             image_opts = true;
972             break;
973         case 't':
974             cache = optarg;
975             break;
976         case 'd':
977             drop = true;
978             break;
979         case 'b':
980             base = optarg;
981             /* -b implies -d */
982             drop = true;
983             break;
984         case 'r':
985             rate_limit = cvtnum("rate limit", optarg, true);
986             if (rate_limit < 0) {
987                 return 1;
988             }
989             break;
990         case 'p':
991             progress = true;
992             break;
993         case 'q':
994             quiet = true;
995             break;
996         case OPTION_OBJECT:
997             user_creatable_process_cmdline(optarg);
998             break;
999         default:
1000             tryhelp(argv[0]);
1001         }
1002     }
1003 
1004     /* Progress is not shown in Quiet mode */
1005     if (quiet) {
1006         progress = false;
1007     }
1008 
1009     if (optind != argc - 1) {
1010         error_exit(argv[0], "Expecting one image file name");
1011     }
1012     filename = argv[optind++];
1013 
1014     flags = BDRV_O_RDWR | BDRV_O_UNMAP;
1015     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1016     if (ret < 0) {
1017         error_report("Invalid cache option: %s", cache);
1018         return 1;
1019     }
1020 
1021     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
1022                    false);
1023     if (!blk) {
1024         return 1;
1025     }
1026     bs = blk_bs(blk);
1027 
1028     qemu_progress_init(progress, 1.f);
1029     qemu_progress_print(0.f, 100);
1030 
1031     bdrv_graph_rdlock_main_loop();
1032     if (base) {
1033         base_bs = bdrv_find_backing_image(bs, base);
1034         if (!base_bs) {
1035             error_setg(&local_err,
1036                        "Did not find '%s' in the backing chain of '%s'",
1037                        base, filename);
1038             bdrv_graph_rdunlock_main_loop();
1039             goto done;
1040         }
1041     } else {
1042         /* This is different from QMP, which by default uses the deepest file in
1043          * the backing chain (i.e., the very base); however, the traditional
1044          * behavior of qemu-img commit is using the immediate backing file. */
1045         base_bs = bdrv_backing_chain_next(bs);
1046         if (!base_bs) {
1047             error_setg(&local_err, "Image does not have a backing file");
1048             bdrv_graph_rdunlock_main_loop();
1049             goto done;
1050         }
1051     }
1052     bdrv_graph_rdunlock_main_loop();
1053 
1054     cbi = (CommonBlockJobCBInfo){
1055         .errp = &local_err,
1056         .bs   = bs,
1057     };
1058 
1059     commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit,
1060                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
1061                         &cbi, false, &local_err);
1062     if (local_err) {
1063         goto done;
1064     }
1065 
1066     /* When the block job completes, the BlockBackend reference will point to
1067      * the old backing file. In order to avoid that the top image is already
1068      * deleted, so we can still empty it afterwards, increment the reference
1069      * counter here preemptively. */
1070     if (!drop) {
1071         bdrv_ref(bs);
1072     }
1073 
1074     job = block_job_get("commit");
1075     assert(job);
1076     run_block_job(job, &local_err);
1077     if (local_err) {
1078         goto unref_backing;
1079     }
1080 
1081     if (!drop) {
1082         BlockBackend *old_backing_blk;
1083 
1084         old_backing_blk = blk_new_with_bs(bs, BLK_PERM_WRITE, BLK_PERM_ALL,
1085                                           &local_err);
1086         if (!old_backing_blk) {
1087             goto unref_backing;
1088         }
1089         ret = blk_make_empty(old_backing_blk, &local_err);
1090         blk_unref(old_backing_blk);
1091         if (ret == -ENOTSUP) {
1092             error_free(local_err);
1093             local_err = NULL;
1094         } else if (ret < 0) {
1095             goto unref_backing;
1096         }
1097     }
1098 
1099 unref_backing:
1100     if (!drop) {
1101         bdrv_unref(bs);
1102     }
1103 
1104 done:
1105     qemu_progress_end();
1106 
1107     /*
1108      * Manually inactivate the image first because this way we can know whether
1109      * an error occurred. blk_unref() doesn't tell us about failures.
1110      */
1111     ret = bdrv_inactivate_all();
1112     if (ret < 0 && !local_err) {
1113         error_setg_errno(&local_err, -ret, "Error while closing the image");
1114     }
1115     blk_unref(blk);
1116 
1117     if (local_err) {
1118         error_report_err(local_err);
1119         return 1;
1120     }
1121 
1122     qprintf(quiet, "Image committed.\n");
1123     return 0;
1124 }
1125 
1126 /*
1127  * Returns -1 if 'buf' contains only zeroes, otherwise the byte index
1128  * of the first sector boundary within buf where the sector contains a
1129  * non-zero byte.  This function is robust to a buffer that is not
1130  * sector-aligned.
1131  */
find_nonzero(const uint8_t * buf,int64_t n)1132 static int64_t find_nonzero(const uint8_t *buf, int64_t n)
1133 {
1134     int64_t i;
1135     int64_t end = QEMU_ALIGN_DOWN(n, BDRV_SECTOR_SIZE);
1136 
1137     for (i = 0; i < end; i += BDRV_SECTOR_SIZE) {
1138         if (!buffer_is_zero(buf + i, BDRV_SECTOR_SIZE)) {
1139             return i;
1140         }
1141     }
1142     if (i < n && !buffer_is_zero(buf + i, n - end)) {
1143         return i;
1144     }
1145     return -1;
1146 }
1147 
1148 /*
1149  * Returns true iff the first sector pointed to by 'buf' contains at least
1150  * a non-NUL byte.
1151  *
1152  * 'pnum' is set to the number of sectors (including and immediately following
1153  * the first one) that are known to be in the same allocated/unallocated state.
1154  * The function will try to align the end offset to alignment boundaries so
1155  * that the request will at least end aligned and consecutive requests will
1156  * also start at an aligned offset.
1157  */
is_allocated_sectors(const uint8_t * buf,int n,int * pnum,int64_t sector_num,int alignment)1158 static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
1159                                 int64_t sector_num, int alignment)
1160 {
1161     bool is_zero;
1162     int i, tail;
1163 
1164     if (n <= 0) {
1165         *pnum = 0;
1166         return 0;
1167     }
1168     is_zero = buffer_is_zero(buf, BDRV_SECTOR_SIZE);
1169     for(i = 1; i < n; i++) {
1170         buf += BDRV_SECTOR_SIZE;
1171         if (is_zero != buffer_is_zero(buf, BDRV_SECTOR_SIZE)) {
1172             break;
1173         }
1174     }
1175 
1176     if (i == n) {
1177         /*
1178          * The whole buf is the same.
1179          * No reason to split it into chunks, so return now.
1180          */
1181         *pnum = i;
1182         return !is_zero;
1183     }
1184 
1185     tail = (sector_num + i) & (alignment - 1);
1186     if (tail) {
1187         if (is_zero && i <= tail) {
1188             /*
1189              * For sure next sector after i is data, and it will rewrite this
1190              * tail anyway due to RMW. So, let's just write data now.
1191              */
1192             is_zero = false;
1193         }
1194         if (!is_zero) {
1195             /* If possible, align up end offset of allocated areas. */
1196             i += alignment - tail;
1197             i = MIN(i, n);
1198         } else {
1199             /*
1200              * For sure next sector after i is data, and it will rewrite this
1201              * tail anyway due to RMW. Better is avoid RMW and write zeroes up
1202              * to aligned bound.
1203              */
1204             i -= tail;
1205         }
1206     }
1207     *pnum = i;
1208     return !is_zero;
1209 }
1210 
1211 /*
1212  * Like is_allocated_sectors, but if the buffer starts with a used sector,
1213  * up to 'min' consecutive sectors containing zeros are ignored. This avoids
1214  * breaking up write requests for only small sparse areas.
1215  */
is_allocated_sectors_min(const uint8_t * buf,int n,int * pnum,int min,int64_t sector_num,int alignment)1216 static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
1217     int min, int64_t sector_num, int alignment)
1218 {
1219     int ret;
1220     int num_checked, num_used;
1221 
1222     if (n < min) {
1223         min = n;
1224     }
1225 
1226     ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1227     if (!ret) {
1228         return ret;
1229     }
1230 
1231     num_used = *pnum;
1232     buf += BDRV_SECTOR_SIZE * *pnum;
1233     n -= *pnum;
1234     sector_num += *pnum;
1235     num_checked = num_used;
1236 
1237     while (n > 0) {
1238         ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
1239 
1240         buf += BDRV_SECTOR_SIZE * *pnum;
1241         n -= *pnum;
1242         sector_num += *pnum;
1243         num_checked += *pnum;
1244         if (ret) {
1245             num_used = num_checked;
1246         } else if (*pnum >= min) {
1247             break;
1248         }
1249     }
1250 
1251     *pnum = num_used;
1252     return 1;
1253 }
1254 
1255 /*
1256  * Compares two buffers chunk by chunk, where @chsize is the chunk size.
1257  * If @chsize is 0, default chunk size of BDRV_SECTOR_SIZE is used.
1258  * Returns 0 if the first chunk of each buffer matches, non-zero otherwise.
1259  *
1260  * @pnum is set to the size of the buffer prefix aligned to @chsize that
1261  * has the same matching status as the first chunk.
1262  */
compare_buffers(const uint8_t * buf1,const uint8_t * buf2,int64_t bytes,uint64_t chsize,int64_t * pnum)1263 static int compare_buffers(const uint8_t *buf1, const uint8_t *buf2,
1264                            int64_t bytes, uint64_t chsize, int64_t *pnum)
1265 {
1266     bool res;
1267     int64_t i;
1268 
1269     assert(bytes > 0);
1270 
1271     if (!chsize) {
1272         chsize = BDRV_SECTOR_SIZE;
1273     }
1274     i = MIN(bytes, chsize);
1275 
1276     res = !!memcmp(buf1, buf2, i);
1277     while (i < bytes) {
1278         int64_t len = MIN(bytes - i, chsize);
1279 
1280         if (!!memcmp(buf1 + i, buf2 + i, len) != res) {
1281             break;
1282         }
1283         i += len;
1284     }
1285 
1286     *pnum = i;
1287     return res;
1288 }
1289 
1290 #define IO_BUF_SIZE (2 * MiB)
1291 
1292 /*
1293  * Check if passed sectors are empty (not allocated or contain only 0 bytes)
1294  *
1295  * Intended for use by 'qemu-img compare': Returns 0 in case sectors are
1296  * filled with 0, 1 if sectors contain non-zero data (this is a comparison
1297  * failure), and 4 on error (the exit status for read errors), after emitting
1298  * an error message.
1299  *
1300  * @param blk:  BlockBackend for the image
1301  * @param offset: Starting offset to check
1302  * @param bytes: Number of bytes to check
1303  * @param filename: Name of disk file we are checking (logging purpose)
1304  * @param buffer: Allocated buffer for storing read data
1305  * @param quiet: Flag for quiet mode
1306  */
check_empty_sectors(BlockBackend * blk,int64_t offset,int64_t bytes,const char * filename,uint8_t * buffer,bool quiet)1307 static int check_empty_sectors(BlockBackend *blk, int64_t offset,
1308                                int64_t bytes, const char *filename,
1309                                uint8_t *buffer, bool quiet)
1310 {
1311     int ret = 0;
1312     int64_t idx;
1313 
1314     ret = blk_pread(blk, offset, bytes, buffer, 0);
1315     if (ret < 0) {
1316         error_report("Error while reading offset %" PRId64 " of %s: %s",
1317                      offset, filename, strerror(-ret));
1318         return 4;
1319     }
1320     idx = find_nonzero(buffer, bytes);
1321     if (idx >= 0) {
1322         qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1323                 offset + idx);
1324         return 1;
1325     }
1326 
1327     return 0;
1328 }
1329 
1330 /*
1331  * Compares two images. Exit codes:
1332  *
1333  * 0 - Images are identical or the requested help was printed
1334  * 1 - Images differ
1335  * >1 - Error occurred
1336  */
img_compare(const img_cmd_t * ccmd,int argc,char ** argv)1337 static int img_compare(const img_cmd_t *ccmd, int argc, char **argv)
1338 {
1339     const char *fmt1 = NULL, *fmt2 = NULL, *cache, *filename1, *filename2;
1340     BlockBackend *blk1, *blk2;
1341     BlockDriverState *bs1, *bs2;
1342     int64_t total_size1, total_size2;
1343     uint8_t *buf1 = NULL, *buf2 = NULL;
1344     int64_t pnum1, pnum2;
1345     int allocated1, allocated2;
1346     int ret = 0; /* return value - 0 Ident, 1 Different, >1 Error */
1347     bool progress = false, quiet = false, strict = false;
1348     int flags;
1349     bool writethrough;
1350     int64_t total_size;
1351     int64_t offset = 0;
1352     int64_t chunk;
1353     int c;
1354     uint64_t progress_base;
1355     bool image_opts = false;
1356     bool force_share = false;
1357 
1358     cache = BDRV_DEFAULT_CACHE;
1359     for (;;) {
1360         static const struct option long_options[] = {
1361             {"help", no_argument, 0, 'h'},
1362             {"a-format", required_argument, 0, 'f'},
1363             {"b-format", required_argument, 0, 'F'},
1364             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
1365             {"strict", no_argument, 0, 's'},
1366             {"cache", required_argument, 0, 'T'},
1367             {"force-share", no_argument, 0, 'U'},
1368             {"progress", no_argument, 0, 'p'},
1369             {"quiet", no_argument, 0, 'q'},
1370             {"object", required_argument, 0, OPTION_OBJECT},
1371             {0, 0, 0, 0}
1372         };
1373         c = getopt_long(argc, argv, "hf:F:sT:Upq",
1374                         long_options, NULL);
1375         if (c == -1) {
1376             break;
1377         }
1378         switch (c) {
1379         case 'h':
1380             cmd_help(ccmd,
1381 "[[-f FMT] [-F FMT] | --image-opts] [-s] [-T CACHE]\n"
1382 "        [-U] [-p] [-q] [--object OBJDEF] FILE1 FILE2\n"
1383 ,
1384 "  -f, --a-format FMT\n"
1385 "     specify FILE1 image format explicitly (default: probing is used)\n"
1386 "  -F, --b-format FMT\n"
1387 "     specify FILE2 image format explicitly (default: probing is used)\n"
1388 "  --image-opts\n"
1389 "     treat FILE1 and FILE2 as option strings (key=value,..), not file names\n"
1390 "     (incompatible with -f|--a-format and -F|--b-format)\n"
1391 "  -s, --strict\n"
1392 "     strict mode, also check if sizes are equal\n"
1393 "  -T, --cache CACHE_MODE\n"
1394 "     images caching mode (default: " BDRV_DEFAULT_CACHE ")\n"
1395 "  -U, --force-share\n"
1396 "     open images in shared mode for concurrent access\n"
1397 "  -p, --progress\n"
1398 "     display progress information\n"
1399 "  -q, --quiet\n"
1400 "     quiet mode (produce only error messages if any)\n"
1401 "  --object OBJDEF\n"
1402 "     defines QEMU user-creatable object\n"
1403 "  FILE1, FILE2\n"
1404 "     names of the image files, or option strings (key=value,..)\n"
1405 "     with --image-opts, to compare\n"
1406 );
1407             break;
1408         case 'f':
1409             fmt1 = optarg;
1410             break;
1411         case 'F':
1412             fmt2 = optarg;
1413             break;
1414         case OPTION_IMAGE_OPTS:
1415             image_opts = true;
1416             break;
1417         case 's':
1418             strict = true;
1419             break;
1420         case 'T':
1421             cache = optarg;
1422             break;
1423         case 'U':
1424             force_share = true;
1425             break;
1426         case 'p':
1427             progress = true;
1428             break;
1429         case 'q':
1430             quiet = true;
1431             break;
1432         case OPTION_OBJECT:
1433             user_creatable_process_cmdline(optarg);
1434             break;
1435         default:
1436             tryhelp(argv[0]);
1437         }
1438     }
1439 
1440     /* Progress is not shown in Quiet mode */
1441     if (quiet) {
1442         progress = false;
1443     }
1444 
1445 
1446     if (optind != argc - 2) {
1447         error_exit(argv[0], "Expecting two image file names");
1448     }
1449     filename1 = argv[optind++];
1450     filename2 = argv[optind++];
1451 
1452     /* Initialize before goto out */
1453     qemu_progress_init(progress, 2.0);
1454 
1455     flags = 0;
1456     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
1457     if (ret < 0) {
1458         error_report("Invalid source cache option: %s", cache);
1459         ret = 2;
1460         goto out3;
1461     }
1462 
1463     blk1 = img_open(image_opts, filename1, fmt1, flags, writethrough, quiet,
1464                     force_share);
1465     if (!blk1) {
1466         ret = 2;
1467         goto out3;
1468     }
1469 
1470     blk2 = img_open(image_opts, filename2, fmt2, flags, writethrough, quiet,
1471                     force_share);
1472     if (!blk2) {
1473         ret = 2;
1474         goto out2;
1475     }
1476     bs1 = blk_bs(blk1);
1477     bs2 = blk_bs(blk2);
1478 
1479     buf1 = blk_blockalign(blk1, IO_BUF_SIZE);
1480     buf2 = blk_blockalign(blk2, IO_BUF_SIZE);
1481     total_size1 = blk_getlength(blk1);
1482     if (total_size1 < 0) {
1483         error_report("Can't get size of %s: %s",
1484                      filename1, strerror(-total_size1));
1485         ret = 4;
1486         goto out;
1487     }
1488     total_size2 = blk_getlength(blk2);
1489     if (total_size2 < 0) {
1490         error_report("Can't get size of %s: %s",
1491                      filename2, strerror(-total_size2));
1492         ret = 4;
1493         goto out;
1494     }
1495     total_size = MIN(total_size1, total_size2);
1496     progress_base = MAX(total_size1, total_size2);
1497 
1498     qemu_progress_print(0, 100);
1499 
1500     if (strict && total_size1 != total_size2) {
1501         ret = 1;
1502         qprintf(quiet, "Strict mode: Image size mismatch!\n");
1503         goto out;
1504     }
1505 
1506     while (offset < total_size) {
1507         int status1, status2;
1508 
1509         status1 = bdrv_block_status_above(bs1, NULL, offset,
1510                                           total_size1 - offset, &pnum1, NULL,
1511                                           NULL);
1512         if (status1 < 0) {
1513             ret = 3;
1514             error_report("Sector allocation test failed for %s", filename1);
1515             goto out;
1516         }
1517         allocated1 = status1 & BDRV_BLOCK_ALLOCATED;
1518 
1519         status2 = bdrv_block_status_above(bs2, NULL, offset,
1520                                           total_size2 - offset, &pnum2, NULL,
1521                                           NULL);
1522         if (status2 < 0) {
1523             ret = 3;
1524             error_report("Sector allocation test failed for %s", filename2);
1525             goto out;
1526         }
1527         allocated2 = status2 & BDRV_BLOCK_ALLOCATED;
1528 
1529         assert(pnum1 && pnum2);
1530         chunk = MIN(pnum1, pnum2);
1531 
1532         if (strict) {
1533             if (status1 != status2) {
1534                 ret = 1;
1535                 qprintf(quiet, "Strict mode: Offset %" PRId64
1536                         " block status mismatch!\n", offset);
1537                 goto out;
1538             }
1539         }
1540         if ((status1 & BDRV_BLOCK_ZERO) && (status2 & BDRV_BLOCK_ZERO)) {
1541             /* nothing to do */
1542         } else if (allocated1 == allocated2) {
1543             if (allocated1) {
1544                 int64_t pnum;
1545 
1546                 chunk = MIN(chunk, IO_BUF_SIZE);
1547                 ret = blk_pread(blk1, offset, chunk, buf1, 0);
1548                 if (ret < 0) {
1549                     error_report("Error while reading offset %" PRId64
1550                                  " of %s: %s",
1551                                  offset, filename1, strerror(-ret));
1552                     ret = 4;
1553                     goto out;
1554                 }
1555                 ret = blk_pread(blk2, offset, chunk, buf2, 0);
1556                 if (ret < 0) {
1557                     error_report("Error while reading offset %" PRId64
1558                                  " of %s: %s",
1559                                  offset, filename2, strerror(-ret));
1560                     ret = 4;
1561                     goto out;
1562                 }
1563                 ret = compare_buffers(buf1, buf2, chunk, 0, &pnum);
1564                 if (ret || pnum != chunk) {
1565                     qprintf(quiet, "Content mismatch at offset %" PRId64 "!\n",
1566                             offset + (ret ? 0 : pnum));
1567                     ret = 1;
1568                     goto out;
1569                 }
1570             }
1571         } else {
1572             chunk = MIN(chunk, IO_BUF_SIZE);
1573             if (allocated1) {
1574                 ret = check_empty_sectors(blk1, offset, chunk,
1575                                           filename1, buf1, quiet);
1576             } else {
1577                 ret = check_empty_sectors(blk2, offset, chunk,
1578                                           filename2, buf1, quiet);
1579             }
1580             if (ret) {
1581                 goto out;
1582             }
1583         }
1584         offset += chunk;
1585         qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1586     }
1587 
1588     if (total_size1 != total_size2) {
1589         BlockBackend *blk_over;
1590         const char *filename_over;
1591 
1592         qprintf(quiet, "Warning: Image size mismatch!\n");
1593         if (total_size1 > total_size2) {
1594             blk_over = blk1;
1595             filename_over = filename1;
1596         } else {
1597             blk_over = blk2;
1598             filename_over = filename2;
1599         }
1600 
1601         while (offset < progress_base) {
1602             ret = bdrv_block_status_above(blk_bs(blk_over), NULL, offset,
1603                                           progress_base - offset, &chunk,
1604                                           NULL, NULL);
1605             if (ret < 0) {
1606                 ret = 3;
1607                 error_report("Sector allocation test failed for %s",
1608                              filename_over);
1609                 goto out;
1610 
1611             }
1612             if (ret & BDRV_BLOCK_ALLOCATED && !(ret & BDRV_BLOCK_ZERO)) {
1613                 chunk = MIN(chunk, IO_BUF_SIZE);
1614                 ret = check_empty_sectors(blk_over, offset, chunk,
1615                                           filename_over, buf1, quiet);
1616                 if (ret) {
1617                     goto out;
1618                 }
1619             }
1620             offset += chunk;
1621             qemu_progress_print(((float) chunk / progress_base) * 100, 100);
1622         }
1623     }
1624 
1625     qprintf(quiet, "Images are identical.\n");
1626     ret = 0;
1627 
1628 out:
1629     qemu_vfree(buf1);
1630     qemu_vfree(buf2);
1631     blk_unref(blk2);
1632 out2:
1633     blk_unref(blk1);
1634 out3:
1635     qemu_progress_end();
1636     return ret;
1637 }
1638 
1639 /* Convenience wrapper around qmp_block_dirty_bitmap_merge */
do_dirty_bitmap_merge(const char * dst_node,const char * dst_name,const char * src_node,const char * src_name,Error ** errp)1640 static void do_dirty_bitmap_merge(const char *dst_node, const char *dst_name,
1641                                   const char *src_node, const char *src_name,
1642                                   Error **errp)
1643 {
1644     BlockDirtyBitmapOrStr *merge_src;
1645     BlockDirtyBitmapOrStrList *list = NULL;
1646 
1647     merge_src = g_new0(BlockDirtyBitmapOrStr, 1);
1648     merge_src->type = QTYPE_QDICT;
1649     merge_src->u.external.node = g_strdup(src_node);
1650     merge_src->u.external.name = g_strdup(src_name);
1651     QAPI_LIST_PREPEND(list, merge_src);
1652     qmp_block_dirty_bitmap_merge(dst_node, dst_name, list, errp);
1653     qapi_free_BlockDirtyBitmapOrStrList(list);
1654 }
1655 
1656 enum ImgConvertBlockStatus {
1657     BLK_DATA,
1658     BLK_ZERO,
1659     BLK_BACKING_FILE,
1660 };
1661 
1662 #define MAX_COROUTINES 16
1663 #define CONVERT_THROTTLE_GROUP "img_convert"
1664 
1665 typedef struct ImgConvertState {
1666     BlockBackend **src;
1667     int64_t *src_sectors;
1668     int *src_alignment;
1669     int src_num;
1670     int64_t total_sectors;
1671     int64_t allocated_sectors;
1672     int64_t allocated_done;
1673     int64_t sector_num;
1674     int64_t wr_offs;
1675     enum ImgConvertBlockStatus status;
1676     int64_t sector_next_status;
1677     BlockBackend *target;
1678     bool has_zero_init;
1679     bool compressed;
1680     bool target_is_new;
1681     bool target_has_backing;
1682     int64_t target_backing_sectors; /* negative if unknown */
1683     bool wr_in_order;
1684     bool copy_range;
1685     bool salvage;
1686     bool quiet;
1687     int min_sparse;
1688     int alignment;
1689     size_t cluster_sectors;
1690     size_t buf_sectors;
1691     long num_coroutines;
1692     int running_coroutines;
1693     Coroutine *co[MAX_COROUTINES];
1694     int64_t wait_sector_num[MAX_COROUTINES];
1695     CoMutex lock;
1696     int ret;
1697 } ImgConvertState;
1698 
convert_select_part(ImgConvertState * s,int64_t sector_num,int * src_cur,int64_t * src_cur_offset)1699 static void convert_select_part(ImgConvertState *s, int64_t sector_num,
1700                                 int *src_cur, int64_t *src_cur_offset)
1701 {
1702     *src_cur = 0;
1703     *src_cur_offset = 0;
1704     while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
1705         *src_cur_offset += s->src_sectors[*src_cur];
1706         (*src_cur)++;
1707         assert(*src_cur < s->src_num);
1708     }
1709 }
1710 
1711 static int coroutine_mixed_fn GRAPH_RDLOCK
convert_iteration_sectors(ImgConvertState * s,int64_t sector_num)1712 convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
1713 {
1714     int64_t src_cur_offset;
1715     int ret, n, src_cur;
1716     bool post_backing_zero = false;
1717 
1718     convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1719 
1720     assert(s->total_sectors > sector_num);
1721     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
1722 
1723     if (s->target_backing_sectors >= 0) {
1724         if (sector_num >= s->target_backing_sectors) {
1725             post_backing_zero = true;
1726         } else if (sector_num + n > s->target_backing_sectors) {
1727             /* Split requests around target_backing_sectors (because
1728              * starting from there, zeros are handled differently) */
1729             n = s->target_backing_sectors - sector_num;
1730         }
1731     }
1732 
1733     if (s->sector_next_status <= sector_num) {
1734         uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
1735         int64_t count;
1736         int tail;
1737         BlockDriverState *src_bs = blk_bs(s->src[src_cur]);
1738         BlockDriverState *base;
1739 
1740         if (s->target_has_backing) {
1741             base = bdrv_cow_bs(bdrv_skip_filters(src_bs));
1742         } else {
1743             base = NULL;
1744         }
1745 
1746         do {
1747             count = n * BDRV_SECTOR_SIZE;
1748 
1749             ret = bdrv_block_status_above(src_bs, base, offset, count, &count,
1750                                           NULL, NULL);
1751 
1752             if (ret < 0) {
1753                 if (s->salvage) {
1754                     if (n == 1) {
1755                         if (!s->quiet) {
1756                             warn_report("error while reading block status at "
1757                                         "offset %" PRIu64 ": %s", offset,
1758                                         strerror(-ret));
1759                         }
1760                         /* Just try to read the data, then */
1761                         ret = BDRV_BLOCK_DATA;
1762                         count = BDRV_SECTOR_SIZE;
1763                     } else {
1764                         /* Retry on a shorter range */
1765                         n = DIV_ROUND_UP(n, 4);
1766                     }
1767                 } else {
1768                     error_report("error while reading block status at offset "
1769                                  "%" PRIu64 ": %s", offset, strerror(-ret));
1770                     return ret;
1771                 }
1772             }
1773         } while (ret < 0);
1774 
1775         n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
1776 
1777         /*
1778          * Avoid that s->sector_next_status becomes unaligned to the source
1779          * request alignment and/or cluster size to avoid unnecessary read
1780          * cycles.
1781          */
1782         tail = (sector_num - src_cur_offset + n) % s->src_alignment[src_cur];
1783         if (n > tail) {
1784             n -= tail;
1785         }
1786 
1787         if (ret & BDRV_BLOCK_ZERO) {
1788             s->status = post_backing_zero ? BLK_BACKING_FILE : BLK_ZERO;
1789         } else if (ret & BDRV_BLOCK_DATA) {
1790             s->status = BLK_DATA;
1791         } else {
1792             s->status = s->target_has_backing ? BLK_BACKING_FILE : BLK_DATA;
1793         }
1794 
1795         s->sector_next_status = sector_num + n;
1796     }
1797 
1798     n = MIN(n, s->sector_next_status - sector_num);
1799     if (s->status == BLK_DATA) {
1800         n = MIN(n, s->buf_sectors);
1801     }
1802 
1803     /* We need to write complete clusters for compressed images, so if an
1804      * unallocated area is shorter than that, we must consider the whole
1805      * cluster allocated. */
1806     if (s->compressed) {
1807         if (n < s->cluster_sectors) {
1808             n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
1809             s->status = BLK_DATA;
1810         } else {
1811             n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
1812         }
1813     }
1814 
1815     return n;
1816 }
1817 
convert_co_read(ImgConvertState * s,int64_t sector_num,int nb_sectors,uint8_t * buf)1818 static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
1819                                         int nb_sectors, uint8_t *buf)
1820 {
1821     uint64_t single_read_until = 0;
1822     int n, ret;
1823 
1824     assert(nb_sectors <= s->buf_sectors);
1825     while (nb_sectors > 0) {
1826         BlockBackend *blk;
1827         int src_cur;
1828         int64_t bs_sectors, src_cur_offset;
1829         uint64_t offset;
1830 
1831         /* In the case of compression with multiple source files, we can get a
1832          * nb_sectors that spreads into the next part. So we must be able to
1833          * read across multiple BDSes for one convert_read() call. */
1834         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1835         blk = s->src[src_cur];
1836         bs_sectors = s->src_sectors[src_cur];
1837 
1838         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1839 
1840         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1841         if (single_read_until > offset) {
1842             n = 1;
1843         }
1844 
1845         ret = blk_co_pread(blk, offset, n << BDRV_SECTOR_BITS, buf, 0);
1846         if (ret < 0) {
1847             if (s->salvage) {
1848                 if (n > 1) {
1849                     single_read_until = offset + (n << BDRV_SECTOR_BITS);
1850                     continue;
1851                 } else {
1852                     if (!s->quiet) {
1853                         warn_report("error while reading offset %" PRIu64
1854                                     ": %s", offset, strerror(-ret));
1855                     }
1856                     memset(buf, 0, BDRV_SECTOR_SIZE);
1857                 }
1858             } else {
1859                 return ret;
1860             }
1861         }
1862 
1863         sector_num += n;
1864         nb_sectors -= n;
1865         buf += n * BDRV_SECTOR_SIZE;
1866     }
1867 
1868     return 0;
1869 }
1870 
1871 
convert_co_write(ImgConvertState * s,int64_t sector_num,int nb_sectors,uint8_t * buf,enum ImgConvertBlockStatus status)1872 static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
1873                                          int nb_sectors, uint8_t *buf,
1874                                          enum ImgConvertBlockStatus status)
1875 {
1876     int ret;
1877 
1878     while (nb_sectors > 0) {
1879         int n = nb_sectors;
1880         BdrvRequestFlags flags = s->compressed ? BDRV_REQ_WRITE_COMPRESSED : 0;
1881 
1882         switch (status) {
1883         case BLK_BACKING_FILE:
1884             /* If we have a backing file, leave clusters unallocated that are
1885              * unallocated in the source image, so that the backing file is
1886              * visible at the respective offset. */
1887             assert(s->target_has_backing);
1888             break;
1889 
1890         case BLK_DATA:
1891             /* If we're told to keep the target fully allocated (-S 0) or there
1892              * is real non-zero data, we must write it. Otherwise we can treat
1893              * it as zero sectors.
1894              * Compressed clusters need to be written as a whole, so in that
1895              * case we can only save the write if the buffer is completely
1896              * zeroed. */
1897             if (!s->min_sparse ||
1898                 (!s->compressed &&
1899                  is_allocated_sectors_min(buf, n, &n, s->min_sparse,
1900                                           sector_num, s->alignment)) ||
1901                 (s->compressed &&
1902                  !buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
1903             {
1904                 ret = blk_co_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
1905                                     n << BDRV_SECTOR_BITS, buf, flags);
1906                 if (ret < 0) {
1907                     return ret;
1908                 }
1909                 break;
1910             }
1911             /* fall-through */
1912 
1913         case BLK_ZERO:
1914             if (s->has_zero_init) {
1915                 assert(!s->target_has_backing);
1916                 break;
1917             }
1918             ret = blk_co_pwrite_zeroes(s->target,
1919                                        sector_num << BDRV_SECTOR_BITS,
1920                                        n << BDRV_SECTOR_BITS,
1921                                        BDRV_REQ_MAY_UNMAP);
1922             if (ret < 0) {
1923                 return ret;
1924             }
1925             break;
1926         }
1927 
1928         sector_num += n;
1929         nb_sectors -= n;
1930         buf += n * BDRV_SECTOR_SIZE;
1931     }
1932 
1933     return 0;
1934 }
1935 
convert_co_copy_range(ImgConvertState * s,int64_t sector_num,int nb_sectors)1936 static int coroutine_fn convert_co_copy_range(ImgConvertState *s, int64_t sector_num,
1937                                               int nb_sectors)
1938 {
1939     int n, ret;
1940 
1941     while (nb_sectors > 0) {
1942         BlockBackend *blk;
1943         int src_cur;
1944         int64_t bs_sectors, src_cur_offset;
1945         int64_t offset;
1946 
1947         convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
1948         offset = (sector_num - src_cur_offset) << BDRV_SECTOR_BITS;
1949         blk = s->src[src_cur];
1950         bs_sectors = s->src_sectors[src_cur];
1951 
1952         n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
1953 
1954         ret = blk_co_copy_range(blk, offset, s->target,
1955                                 sector_num << BDRV_SECTOR_BITS,
1956                                 n << BDRV_SECTOR_BITS, 0, 0);
1957         if (ret < 0) {
1958             return ret;
1959         }
1960 
1961         sector_num += n;
1962         nb_sectors -= n;
1963     }
1964     return 0;
1965 }
1966 
convert_co_do_copy(void * opaque)1967 static void coroutine_fn convert_co_do_copy(void *opaque)
1968 {
1969     ImgConvertState *s = opaque;
1970     uint8_t *buf = NULL;
1971     int ret, i;
1972     int index = -1;
1973 
1974     for (i = 0; i < s->num_coroutines; i++) {
1975         if (s->co[i] == qemu_coroutine_self()) {
1976             index = i;
1977             break;
1978         }
1979     }
1980     assert(index >= 0);
1981 
1982     s->running_coroutines++;
1983     buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
1984 
1985     while (1) {
1986         int n;
1987         int64_t sector_num;
1988         enum ImgConvertBlockStatus status;
1989         bool copy_range;
1990 
1991         qemu_co_mutex_lock(&s->lock);
1992         if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
1993             qemu_co_mutex_unlock(&s->lock);
1994             break;
1995         }
1996         WITH_GRAPH_RDLOCK_GUARD() {
1997             n = convert_iteration_sectors(s, s->sector_num);
1998         }
1999         if (n < 0) {
2000             qemu_co_mutex_unlock(&s->lock);
2001             s->ret = n;
2002             break;
2003         }
2004         /* save current sector and allocation status to local variables */
2005         sector_num = s->sector_num;
2006         status = s->status;
2007         if (!s->min_sparse && s->status == BLK_ZERO) {
2008             n = MIN(n, s->buf_sectors);
2009         }
2010         /* increment global sector counter so that other coroutines can
2011          * already continue reading beyond this request */
2012         s->sector_num += n;
2013         qemu_co_mutex_unlock(&s->lock);
2014 
2015         if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
2016             s->allocated_done += n;
2017             qemu_progress_print(100.0 * s->allocated_done /
2018                                         s->allocated_sectors, 0);
2019         }
2020 
2021 retry:
2022         copy_range = s->copy_range && s->status == BLK_DATA;
2023         if (status == BLK_DATA && !copy_range) {
2024             ret = convert_co_read(s, sector_num, n, buf);
2025             if (ret < 0) {
2026                 error_report("error while reading at byte %lld: %s",
2027                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2028                 s->ret = ret;
2029             }
2030         } else if (!s->min_sparse && status == BLK_ZERO) {
2031             status = BLK_DATA;
2032             memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
2033         }
2034 
2035         if (s->wr_in_order) {
2036             /* keep writes in order */
2037             while (s->wr_offs != sector_num && s->ret == -EINPROGRESS) {
2038                 s->wait_sector_num[index] = sector_num;
2039                 qemu_coroutine_yield();
2040             }
2041             s->wait_sector_num[index] = -1;
2042         }
2043 
2044         if (s->ret == -EINPROGRESS) {
2045             if (copy_range) {
2046                 WITH_GRAPH_RDLOCK_GUARD() {
2047                     ret = convert_co_copy_range(s, sector_num, n);
2048                 }
2049                 if (ret) {
2050                     s->copy_range = false;
2051                     goto retry;
2052                 }
2053             } else {
2054                 ret = convert_co_write(s, sector_num, n, buf, status);
2055             }
2056             if (ret < 0) {
2057                 error_report("error while writing at byte %lld: %s",
2058                              sector_num * BDRV_SECTOR_SIZE, strerror(-ret));
2059                 s->ret = ret;
2060             }
2061         }
2062 
2063         if (s->wr_in_order) {
2064             /* reenter the coroutine that might have waited
2065              * for this write to complete */
2066             s->wr_offs = sector_num + n;
2067             for (i = 0; i < s->num_coroutines; i++) {
2068                 if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
2069                     /*
2070                      * A -> B -> A cannot occur because A has
2071                      * s->wait_sector_num[i] == -1 during A -> B.  Therefore
2072                      * B will never enter A during this time window.
2073                      */
2074                     qemu_coroutine_enter(s->co[i]);
2075                     break;
2076                 }
2077             }
2078         }
2079     }
2080 
2081     qemu_vfree(buf);
2082     s->co[index] = NULL;
2083     s->running_coroutines--;
2084     if (!s->running_coroutines && s->ret == -EINPROGRESS) {
2085         /* the convert job finished successfully */
2086         s->ret = 0;
2087     }
2088 }
2089 
convert_do_copy(ImgConvertState * s)2090 static int convert_do_copy(ImgConvertState *s)
2091 {
2092     int ret, i, n;
2093     int64_t sector_num = 0;
2094 
2095     /* Check whether we have zero initialisation or can get it efficiently */
2096     if (!s->has_zero_init && s->target_is_new && s->min_sparse &&
2097         !s->target_has_backing) {
2098         bdrv_graph_rdlock_main_loop();
2099         s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target));
2100         bdrv_graph_rdunlock_main_loop();
2101     }
2102 
2103     /* Allocate buffer for copied data. For compressed images, only one cluster
2104      * can be copied at a time. */
2105     if (s->compressed) {
2106         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
2107             error_report("invalid cluster size");
2108             return -EINVAL;
2109         }
2110         s->buf_sectors = s->cluster_sectors;
2111     }
2112 
2113     while (sector_num < s->total_sectors) {
2114         bdrv_graph_rdlock_main_loop();
2115         n = convert_iteration_sectors(s, sector_num);
2116         bdrv_graph_rdunlock_main_loop();
2117         if (n < 0) {
2118             return n;
2119         }
2120         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
2121         {
2122             s->allocated_sectors += n;
2123         }
2124         sector_num += n;
2125     }
2126 
2127     /* Do the copy */
2128     s->sector_next_status = 0;
2129     s->ret = -EINPROGRESS;
2130 
2131     qemu_co_mutex_init(&s->lock);
2132     for (i = 0; i < s->num_coroutines; i++) {
2133         s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
2134         s->wait_sector_num[i] = -1;
2135         qemu_coroutine_enter(s->co[i]);
2136     }
2137 
2138     while (s->running_coroutines) {
2139         main_loop_wait(false);
2140     }
2141 
2142     if (s->compressed && !s->ret) {
2143         /* signal EOF to align */
2144         ret = blk_pwrite_compressed(s->target, 0, 0, NULL);
2145         if (ret < 0) {
2146             return ret;
2147         }
2148     }
2149 
2150     return s->ret;
2151 }
2152 
2153 /* Check that bitmaps can be copied, or output an error */
convert_check_bitmaps(BlockDriverState * src,bool skip_broken)2154 static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken)
2155 {
2156     BdrvDirtyBitmap *bm;
2157 
2158     if (!bdrv_supports_persistent_dirty_bitmap(src)) {
2159         error_report("Source lacks bitmap support");
2160         return -1;
2161     }
2162     FOR_EACH_DIRTY_BITMAP(src, bm) {
2163         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2164             continue;
2165         }
2166         if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2167             error_report("Cannot copy inconsistent bitmap '%s'",
2168                          bdrv_dirty_bitmap_name(bm));
2169             error_printf("Try --skip-broken-bitmaps, or "
2170                          "use 'qemu-img bitmap --remove' to delete it\n");
2171             return -1;
2172         }
2173     }
2174     return 0;
2175 }
2176 
convert_copy_bitmaps(BlockDriverState * src,BlockDriverState * dst,bool skip_broken)2177 static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst,
2178                                 bool skip_broken)
2179 {
2180     BdrvDirtyBitmap *bm;
2181     Error *err = NULL;
2182 
2183     FOR_EACH_DIRTY_BITMAP(src, bm) {
2184         const char *name;
2185 
2186         if (!bdrv_dirty_bitmap_get_persistence(bm)) {
2187             continue;
2188         }
2189         name = bdrv_dirty_bitmap_name(bm);
2190         if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) {
2191             warn_report("Skipping inconsistent bitmap '%s'", name);
2192             continue;
2193         }
2194         qmp_block_dirty_bitmap_add(dst->node_name, name,
2195                                    true, bdrv_dirty_bitmap_granularity(bm),
2196                                    true, true,
2197                                    true, !bdrv_dirty_bitmap_enabled(bm),
2198                                    &err);
2199         if (err) {
2200             error_reportf_err(err, "Failed to create bitmap %s: ", name);
2201             return -1;
2202         }
2203 
2204         do_dirty_bitmap_merge(dst->node_name, name, src->node_name, name,
2205                               &err);
2206         if (err) {
2207             error_reportf_err(err, "Failed to populate bitmap %s: ", name);
2208             qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL);
2209             return -1;
2210         }
2211     }
2212 
2213     return 0;
2214 }
2215 
2216 #define MAX_BUF_SECTORS 32768
2217 
set_rate_limit(BlockBackend * blk,int64_t rate_limit)2218 static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
2219 {
2220     ThrottleConfig cfg;
2221 
2222     throttle_config_init(&cfg);
2223     cfg.buckets[THROTTLE_BPS_WRITE].avg = rate_limit;
2224 
2225     blk_io_limits_enable(blk, CONVERT_THROTTLE_GROUP);
2226     blk_set_io_limits(blk, &cfg);
2227 }
2228 
img_convert(const img_cmd_t * ccmd,int argc,char ** argv)2229 static int img_convert(const img_cmd_t *ccmd, int argc, char **argv)
2230 {
2231     int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
2232     const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
2233                *src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
2234                *out_filename, *out_baseimg_param, *snapshot_name = NULL,
2235                *backing_fmt = NULL;
2236     BlockDriver *drv = NULL, *proto_drv = NULL;
2237     BlockDriverInfo bdi;
2238     BlockDriverState *out_bs;
2239     QemuOpts *opts = NULL, *sn_opts = NULL;
2240     QemuOptsList *create_opts = NULL;
2241     QDict *open_opts = NULL;
2242     char *options = NULL;
2243     Error *local_err = NULL;
2244     bool writethrough, src_writethrough, image_opts = false,
2245          skip_create = false, progress = false, tgt_image_opts = false;
2246     int64_t ret = -EINVAL;
2247     bool force_share = false;
2248     bool explict_min_sparse = false;
2249     bool bitmaps = false;
2250     bool skip_broken = false;
2251     int64_t rate_limit = 0;
2252 
2253     ImgConvertState s = (ImgConvertState) {
2254         /* Need at least 4k of zeros for sparse detection */
2255         .min_sparse         = 8,
2256         .copy_range         = false,
2257         .buf_sectors        = IO_BUF_SIZE / BDRV_SECTOR_SIZE,
2258         .wr_in_order        = true,
2259         .num_coroutines     = 8,
2260     };
2261 
2262     for(;;) {
2263         static const struct option long_options[] = {
2264             {"help", no_argument, 0, 'h'},
2265             {"source-format", required_argument, 0, 'f'},
2266             /*
2267              * XXX: historic --image-opts acts on source file only,
2268              * it seems better to have it affect both source and target,
2269              * and have separate --source-image-opts for source,
2270              * but this might break existing setups.
2271              */
2272             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
2273             {"source-cache", required_argument, 0, 'T'},
2274             {"snapshot", required_argument, 0, 'l'},
2275             {"bitmaps", no_argument, 0, OPTION_BITMAPS},
2276             {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN},
2277             {"salvage", no_argument, 0, OPTION_SALVAGE},
2278             {"target-format", required_argument, 0, 'O'},
2279             {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
2280             {"target-format-options", required_argument, 0, 'o'},
2281             {"target-cache", required_argument, 0, 't'},
2282             {"backing", required_argument, 0, 'b'},
2283             {"backing-format", required_argument, 0, 'F'},
2284             {"sparse-size", required_argument, 0, 'S'},
2285             {"no-create", no_argument, 0, 'n'},
2286             {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO},
2287             {"force-share", no_argument, 0, 'U'},
2288             {"rate-limit", required_argument, 0, 'r'},
2289             {"parallel", required_argument, 0, 'm'},
2290             {"oob-writes", no_argument, 0, 'W'},
2291             {"copy-range-offloading", no_argument, 0, 'C'},
2292             {"progress", no_argument, 0, 'p'},
2293             {"quiet", no_argument, 0, 'q'},
2294             {"object", required_argument, 0, OPTION_OBJECT},
2295             {0, 0, 0, 0}
2296         };
2297         c = getopt_long(argc, argv, "hf:O:b:B:CcF:o:l:S:pt:T:nm:WUr:q",
2298                         long_options, NULL);
2299         if (c == -1) {
2300             break;
2301         }
2302         switch (c) {
2303         case 'h':
2304             cmd_help(ccmd, "[-f SRC_FMT | --image-opts] [-T SRC_CACHE]\n"
2305 "        [-l SNAPSHOT] [--bitmaps [--skip-broken-bitmaps]] [--salvage]\n"
2306 "        [-O TGT_FMT | --target-image-opts] [-o TGT_FMT_OPTS] [-t TGT_CACHE]\n"
2307 "        [-b BACKING_FILE [-F BACKING_FMT]] [-S SPARSE_SIZE]\n"
2308 "        [-n] [--target-is-zero] [-c]\n"
2309 "        [-U] [-r RATE] [-m NUM_PARALLEL] [-W] [-C] [-p] [-q] [--object OBJDEF]\n"
2310 "        SRC_FILE [SRC_FILE2...] TGT_FILE\n"
2311 ,
2312 "  -f, --source-format SRC_FMT\n"
2313 "     specify format of all SRC_FILEs explicitly (default: probing is used)\n"
2314 "  --image-opts\n"
2315 "     treat each SRC_FILE as an option string (key=value,...), not a file name\n"
2316 "     (incompatible with -f|--source-format)\n"
2317 "  -T, --source-cache SRC_CACHE\n"
2318 "     source image(s) cache mode (" BDRV_DEFAULT_CACHE ")\n"
2319 "  -l, --snapshot SNAPSHOT\n"
2320 "     specify source snapshot\n"
2321 "  --bitmaps\n"
2322 "     also copy any persistent bitmaps present in source\n"
2323 "  --skip-broken-bitmaps\n"
2324 "     skip (do not error out) any broken bitmaps\n"
2325 "  --salvage\n"
2326 "     ignore errors on input (convert unreadable areas to zeros)\n"
2327 "  -O, --target-format TGT_FMT\n"
2328 "     specify TGT_FILE image format (default: raw)\n"
2329 "  --target-image-opts\n"
2330 "     treat TGT_FILE as an option string (key=value,...), not a file name\n"
2331 "     (incompatible with -O|--target-format)\n"
2332 "  -o, --target-format-options TGT_FMT_OPTS\n"
2333 "     TGT_FMT-specific options\n"
2334 "  -t, --target-cache TGT_CACHE\n"
2335 "     cache mode when opening output image (default: unsafe)\n"
2336 "  -b, --backing BACKING_FILE (was -B in <= 10.0)\n"
2337 "     create target image to be a CoW on top of BACKING_FILE\n"
2338 "  -F, --backing-format BACKING_FMT\n" /* -B used for -b in <=10.0 */
2339 "     specify BACKING_FILE image format explicitly (default: probing is used)\n"
2340 "  -S, --sparse-size SPARSE_SIZE[bkKMGTPE]\n"
2341 "     specify number of consecutive zero bytes to treat as a gap on output\n"
2342 "     (rounded down to nearest 512 bytes), with optional multiplier suffix\n"
2343 "  -n, --no-create\n"
2344 "     omit target volume creation (e.g. on rbd)\n"
2345 "  --target-is-zero\n"
2346 "     indicates that the target volume is pre-zeroed\n"
2347 "  -c, --compress\n"
2348 "     create compressed output image (qcow and qcow2 formats only)\n"
2349 "  -U, --force-share\n"
2350 "     open images in shared mode for concurrent access\n"
2351 "  -r, --rate-limit RATE\n"
2352 "     I/O rate limit, in bytes per second\n"
2353 "  -m, --parallel NUM_PARALLEL\n"
2354 "     specify parallelism (default: 8)\n"
2355 "  -C, --copy-range-offloading\n"
2356 "     try to use copy offloading\n"
2357 "  -W, --oob-writes\n"
2358 "     enable out-of-order writes to improve performance\n"
2359 "  -p, --progress\n"
2360 "     display progress information\n"
2361 "  -q, --quiet\n"
2362 "     quiet mode (produce only error messages if any)\n"
2363 "  --object OBJDEF\n"
2364 "     defines QEMU user-creatable object\n"
2365 "  SRC_FILE...\n"
2366 "     one or more source image file names,\n"
2367 "     or option strings (key=value,..) with --source-image-opts\n"
2368 "  TGT_FILE\n"
2369 "     target (output) image file name,\n"
2370 "     or option string (key=value,..) with --target-image-opts\n"
2371 );
2372             break;
2373         case 'f':
2374             fmt = optarg;
2375             break;
2376         case OPTION_IMAGE_OPTS:
2377             image_opts = true;
2378             break;
2379         case 'T':
2380             src_cache = optarg;
2381             break;
2382         case 'l':
2383             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
2384                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
2385                                                   optarg, false);
2386                 if (!sn_opts) {
2387                     error_report("Failed in parsing snapshot param '%s'",
2388                                  optarg);
2389                     goto fail_getopt;
2390                 }
2391             } else {
2392                 snapshot_name = optarg;
2393             }
2394             break;
2395         case OPTION_BITMAPS:
2396             bitmaps = true;
2397             break;
2398         case OPTION_SKIP_BROKEN:
2399             skip_broken = true;
2400             break;
2401         case OPTION_SALVAGE:
2402             s.salvage = true;
2403             break;
2404          case 'O':
2405             out_fmt = optarg;
2406             break;
2407         case OPTION_TARGET_IMAGE_OPTS:
2408             tgt_image_opts = true;
2409             break;
2410         case 'o':
2411             if (accumulate_options(&options, optarg) < 0) {
2412                 goto fail_getopt;
2413             }
2414             break;
2415         case 't':
2416             cache = optarg;
2417             break;
2418         case 'B': /* <=10.0 */
2419         case 'b':
2420             out_baseimg = optarg;
2421             break;
2422         case 'F': /* can't use -B as it used as -b in <=10.0 */
2423             backing_fmt = optarg;
2424             break;
2425         case 'S':
2426         {
2427             int64_t sval;
2428 
2429             sval = cvtnum("buffer size for sparse output", optarg, true);
2430             if (sval < 0) {
2431                 goto fail_getopt;
2432             } else if (!QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) ||
2433                 sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) {
2434                 error_report("Invalid buffer size for sparse output specified. "
2435                     "Valid sizes are multiples of %llu up to %llu. Select "
2436                     "0 to disable sparse detection (fully allocates output).",
2437                     BDRV_SECTOR_SIZE, MAX_BUF_SECTORS * BDRV_SECTOR_SIZE);
2438                 goto fail_getopt;
2439             }
2440 
2441             s.min_sparse = sval / BDRV_SECTOR_SIZE;
2442             explict_min_sparse = true;
2443             break;
2444         }
2445         case 'n':
2446             skip_create = true;
2447             break;
2448         case OPTION_TARGET_IS_ZERO:
2449             /*
2450              * The user asserting that the target is blank has the
2451              * same effect as the target driver supporting zero
2452              * initialisation.
2453              */
2454             s.has_zero_init = true;
2455             break;
2456         case 'c':
2457             s.compressed = true;
2458             break;
2459         case 'U':
2460             force_share = true;
2461             break;
2462         case 'r':
2463             rate_limit = cvtnum("rate limit", optarg, true);
2464             if (rate_limit < 0) {
2465                 goto fail_getopt;
2466             }
2467             break;
2468         case 'm':
2469             s.num_coroutines = cvtnum_full("number of coroutines", optarg,
2470                                            false, 1, MAX_COROUTINES);
2471             if (s.num_coroutines < 0) {
2472                 goto fail_getopt;
2473             }
2474             break;
2475         case 'W':
2476             s.wr_in_order = false;
2477             break;
2478         case 'C':
2479             s.copy_range = true;
2480             break;
2481         case 'p':
2482             progress = true;
2483             break;
2484         case 'q':
2485             s.quiet = true;
2486             break;
2487         case OPTION_OBJECT:
2488             user_creatable_process_cmdline(optarg);
2489             break;
2490         default:
2491             tryhelp(argv[0]);
2492         }
2493     }
2494 
2495     if (!out_fmt && !tgt_image_opts) {
2496         out_fmt = "raw";
2497     }
2498 
2499     if (skip_broken && !bitmaps) {
2500         error_report("Use of --skip-broken-bitmaps requires --bitmaps");
2501         goto fail_getopt;
2502     }
2503 
2504     if (s.compressed && s.copy_range) {
2505         error_report("Cannot enable copy offloading when -c is used");
2506         goto fail_getopt;
2507     }
2508 
2509     if (explict_min_sparse && s.copy_range) {
2510         error_report("Cannot enable copy offloading when -S is used");
2511         goto fail_getopt;
2512     }
2513 
2514     if (s.copy_range && s.salvage) {
2515         error_report("Cannot use copy offloading in salvaging mode");
2516         goto fail_getopt;
2517     }
2518 
2519     if (tgt_image_opts && !skip_create) {
2520         error_report("--target-image-opts requires use of -n flag");
2521         goto fail_getopt;
2522     }
2523 
2524     if (skip_create && options) {
2525         error_report("-o has no effect when skipping image creation");
2526         goto fail_getopt;
2527     }
2528 
2529     if (s.has_zero_init && !skip_create) {
2530         error_report("--target-is-zero requires use of -n flag");
2531         goto fail_getopt;
2532     }
2533 
2534     s.src_num = argc - optind - 1;
2535     out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
2536 
2537     if (options && has_help_option(options)) {
2538         if (out_fmt) {
2539             ret = print_block_option_help(out_filename, out_fmt);
2540             goto fail_getopt;
2541         } else {
2542             error_report("Option help requires a format be specified");
2543             goto fail_getopt;
2544         }
2545     }
2546 
2547     if (s.src_num < 1) {
2548         error_report("Must specify image file name");
2549         goto fail_getopt;
2550     }
2551 
2552     /* ret is still -EINVAL until here */
2553     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
2554     if (ret < 0) {
2555         error_report("Invalid source cache option: %s", src_cache);
2556         goto fail_getopt;
2557     }
2558 
2559     /* Initialize before goto out */
2560     if (s.quiet) {
2561         progress = false;
2562     }
2563     qemu_progress_init(progress, 1.0);
2564     qemu_progress_print(0, 100);
2565 
2566     s.src = g_new0(BlockBackend *, s.src_num);
2567     s.src_sectors = g_new(int64_t, s.src_num);
2568     s.src_alignment = g_new(int, s.src_num);
2569 
2570     for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2571         BlockDriverState *src_bs;
2572         s.src[bs_i] = img_open(image_opts, argv[optind + bs_i],
2573                                fmt, src_flags, src_writethrough, s.quiet,
2574                                force_share);
2575         if (!s.src[bs_i]) {
2576             ret = -1;
2577             goto out;
2578         }
2579         s.src_sectors[bs_i] = blk_nb_sectors(s.src[bs_i]);
2580         if (s.src_sectors[bs_i] < 0) {
2581             error_report("Could not get size of %s: %s",
2582                          argv[optind + bs_i], strerror(-s.src_sectors[bs_i]));
2583             ret = -1;
2584             goto out;
2585         }
2586         src_bs = blk_bs(s.src[bs_i]);
2587         s.src_alignment[bs_i] = DIV_ROUND_UP(src_bs->bl.request_alignment,
2588                                              BDRV_SECTOR_SIZE);
2589         if (!bdrv_get_info(src_bs, &bdi)) {
2590             s.src_alignment[bs_i] = MAX(s.src_alignment[bs_i],
2591                                         bdi.cluster_size / BDRV_SECTOR_SIZE);
2592         }
2593         s.total_sectors += s.src_sectors[bs_i];
2594     }
2595 
2596     if (sn_opts) {
2597         bdrv_snapshot_load_tmp(blk_bs(s.src[0]),
2598                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
2599                                qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
2600                                &local_err);
2601     } else if (snapshot_name != NULL) {
2602         if (s.src_num > 1) {
2603             error_report("No support for concatenating multiple snapshot");
2604             ret = -1;
2605             goto out;
2606         }
2607 
2608         bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(s.src[0]), snapshot_name,
2609                                              &local_err);
2610     }
2611     if (local_err) {
2612         error_reportf_err(local_err, "Failed to load snapshot: ");
2613         ret = -1;
2614         goto out;
2615     }
2616 
2617     if (!skip_create) {
2618         /* Find driver and parse its options */
2619         drv = bdrv_find_format(out_fmt);
2620         if (!drv) {
2621             error_report("Unknown file format '%s'", out_fmt);
2622             ret = -1;
2623             goto out;
2624         }
2625 
2626         proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
2627         if (!proto_drv) {
2628             error_report_err(local_err);
2629             ret = -1;
2630             goto out;
2631         }
2632 
2633         if (!drv->create_opts) {
2634             error_report("Format driver '%s' does not support image creation",
2635                          drv->format_name);
2636             ret = -1;
2637             goto out;
2638         }
2639 
2640         if (!proto_drv->create_opts) {
2641             error_report("Protocol driver '%s' does not support image creation",
2642                          proto_drv->format_name);
2643             ret = -1;
2644             goto out;
2645         }
2646 
2647         create_opts = qemu_opts_append(create_opts, drv->create_opts);
2648         create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
2649 
2650         opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
2651         if (options) {
2652             if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
2653                 error_report_err(local_err);
2654                 ret = -1;
2655                 goto out;
2656             }
2657         }
2658 
2659         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
2660                             s.total_sectors * BDRV_SECTOR_SIZE, &error_abort);
2661         ret = add_old_style_options(out_fmt, opts, out_baseimg, backing_fmt);
2662         if (ret < 0) {
2663             goto out;
2664         }
2665     }
2666 
2667     /* Get backing file name if -o backing_file was used */
2668     out_baseimg_param = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
2669     if (out_baseimg_param) {
2670         out_baseimg = out_baseimg_param;
2671     }
2672     s.target_has_backing = (bool) out_baseimg;
2673 
2674     if (s.has_zero_init && s.target_has_backing) {
2675         error_report("Cannot use --target-is-zero when the destination "
2676                      "image has a backing file");
2677         goto out;
2678     }
2679 
2680     if (s.src_num > 1 && out_baseimg) {
2681         error_report("Having a backing file for the target makes no sense when "
2682                      "concatenating multiple input images");
2683         ret = -1;
2684         goto out;
2685     }
2686 
2687     if (out_baseimg_param) {
2688         if (!qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT)) {
2689             error_report("Use of backing file requires explicit "
2690                          "backing format");
2691             ret = -1;
2692             goto out;
2693         }
2694     }
2695 
2696     /* Check if compression is supported */
2697     if (s.compressed) {
2698         bool encryption =
2699             qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, false);
2700         const char *encryptfmt =
2701             qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT);
2702         const char *preallocation =
2703             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
2704 
2705         if (drv && !block_driver_can_compress(drv)) {
2706             error_report("Compression not supported for this file format");
2707             ret = -1;
2708             goto out;
2709         }
2710 
2711         if (encryption || encryptfmt) {
2712             error_report("Compression and encryption not supported at "
2713                          "the same time");
2714             ret = -1;
2715             goto out;
2716         }
2717 
2718         if (preallocation
2719             && strcmp(preallocation, "off"))
2720         {
2721             error_report("Compression and preallocation not supported at "
2722                          "the same time");
2723             ret = -1;
2724             goto out;
2725         }
2726     }
2727 
2728     /* Determine if bitmaps need copying */
2729     if (bitmaps) {
2730         if (s.src_num > 1) {
2731             error_report("Copying bitmaps only possible with single source");
2732             ret = -1;
2733             goto out;
2734         }
2735         ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken);
2736         if (ret < 0) {
2737             goto out;
2738         }
2739     }
2740 
2741     /*
2742      * The later open call will need any decryption secrets, and
2743      * bdrv_create() will purge "opts", so extract them now before
2744      * they are lost.
2745      */
2746     if (!skip_create) {
2747         open_opts = qdict_new();
2748         qemu_opt_foreach(opts, img_add_key_secrets, open_opts, &error_abort);
2749 
2750         /* Create the new image */
2751         ret = bdrv_create(drv, out_filename, opts, &local_err);
2752         if (ret < 0) {
2753             error_reportf_err(local_err, "%s: error while converting %s: ",
2754                               out_filename, out_fmt);
2755             goto out;
2756         }
2757     }
2758 
2759     s.target_is_new = !skip_create;
2760 
2761     flags = s.min_sparse ? (BDRV_O_RDWR | BDRV_O_UNMAP) : BDRV_O_RDWR;
2762     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
2763     if (ret < 0) {
2764         error_report("Invalid cache option: %s", cache);
2765         goto out;
2766     }
2767 
2768     if (flags & BDRV_O_NOCACHE) {
2769         /*
2770          * If we open the target with O_DIRECT, it may be necessary to
2771          * extend its size to align to the physical sector size.
2772          */
2773         flags |= BDRV_O_RESIZE;
2774     }
2775 
2776     if (skip_create) {
2777         s.target = img_open(tgt_image_opts, out_filename, out_fmt,
2778                             flags, writethrough, s.quiet, false);
2779     } else {
2780         /* TODO ultimately we should allow --target-image-opts
2781          * to be used even when -n is not given.
2782          * That has to wait for bdrv_create to be improved
2783          * to allow filenames in option syntax
2784          */
2785         s.target = img_open_file(out_filename, open_opts, out_fmt,
2786                                  flags, writethrough, s.quiet, false);
2787         open_opts = NULL; /* blk_new_open will have freed it */
2788     }
2789     if (!s.target) {
2790         ret = -1;
2791         goto out;
2792     }
2793     out_bs = blk_bs(s.target);
2794 
2795     if (bitmaps && !bdrv_supports_persistent_dirty_bitmap(out_bs)) {
2796         error_report("Format driver '%s' does not support bitmaps",
2797                      out_bs->drv->format_name);
2798         ret = -1;
2799         goto out;
2800     }
2801 
2802     if (s.compressed && !block_driver_can_compress(out_bs->drv)) {
2803         error_report("Compression not supported for this file format");
2804         ret = -1;
2805         goto out;
2806     }
2807 
2808     /* increase bufsectors from the default 4096 (2M) if opt_transfer
2809      * or discard_alignment of the out_bs is greater. Limit to
2810      * MAX_BUF_SECTORS as maximum which is currently 32768 (16MB). */
2811     s.buf_sectors = MIN(MAX_BUF_SECTORS,
2812                         MAX(s.buf_sectors,
2813                             MAX(out_bs->bl.opt_transfer >> BDRV_SECTOR_BITS,
2814                                 out_bs->bl.pdiscard_alignment >>
2815                                 BDRV_SECTOR_BITS)));
2816 
2817     /* try to align the write requests to the destination to avoid unnecessary
2818      * RMW cycles. */
2819     s.alignment = MAX(pow2floor(s.min_sparse),
2820                       DIV_ROUND_UP(out_bs->bl.request_alignment,
2821                                    BDRV_SECTOR_SIZE));
2822     assert(is_power_of_2(s.alignment));
2823 
2824     if (skip_create) {
2825         int64_t output_sectors = blk_nb_sectors(s.target);
2826         if (output_sectors < 0) {
2827             error_report("unable to get output image length: %s",
2828                          strerror(-output_sectors));
2829             ret = -1;
2830             goto out;
2831         } else if (output_sectors < s.total_sectors) {
2832             error_report("output file is smaller than input file");
2833             ret = -1;
2834             goto out;
2835         }
2836     }
2837 
2838     if (s.target_has_backing && s.target_is_new) {
2839         /* Errors are treated as "backing length unknown" (which means
2840          * s.target_backing_sectors has to be negative, which it will
2841          * be automatically).  The backing file length is used only
2842          * for optimizations, so such a case is not fatal. */
2843         bdrv_graph_rdlock_main_loop();
2844         s.target_backing_sectors =
2845             bdrv_nb_sectors(bdrv_backing_chain_next(out_bs));
2846         bdrv_graph_rdunlock_main_loop();
2847     } else {
2848         s.target_backing_sectors = -1;
2849     }
2850 
2851     ret = bdrv_get_info(out_bs, &bdi);
2852     if (ret < 0) {
2853         if (s.compressed) {
2854             error_report("could not get block driver info");
2855             goto out;
2856         }
2857     } else {
2858         s.compressed = s.compressed || bdi.needs_compressed_writes;
2859         s.cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
2860     }
2861 
2862     if (rate_limit) {
2863         set_rate_limit(s.target, rate_limit);
2864     }
2865 
2866     ret = convert_do_copy(&s);
2867 
2868     /* Now copy the bitmaps */
2869     if (bitmaps && ret == 0) {
2870         ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken);
2871     }
2872 
2873 out:
2874     if (!ret) {
2875         qemu_progress_print(100, 0);
2876     }
2877     qemu_progress_end();
2878     qemu_opts_del(opts);
2879     qemu_opts_free(create_opts);
2880     qobject_unref(open_opts);
2881     blk_unref(s.target);
2882     if (s.src) {
2883         for (bs_i = 0; bs_i < s.src_num; bs_i++) {
2884             blk_unref(s.src[bs_i]);
2885         }
2886         g_free(s.src);
2887     }
2888     g_free(s.src_sectors);
2889     g_free(s.src_alignment);
2890 fail_getopt:
2891     qemu_opts_del(sn_opts);
2892     g_free(options);
2893 
2894     return !!ret;
2895 }
2896 
2897 
dump_snapshots(BlockDriverState * bs)2898 static void dump_snapshots(BlockDriverState *bs)
2899 {
2900     QEMUSnapshotInfo *sn_tab, *sn;
2901     int nb_sns, i;
2902 
2903     nb_sns = bdrv_snapshot_list(bs, &sn_tab);
2904     if (nb_sns <= 0)
2905         return;
2906     printf("Snapshot list:\n");
2907     bdrv_snapshot_dump(NULL);
2908     printf("\n");
2909     for(i = 0; i < nb_sns; i++) {
2910         sn = &sn_tab[i];
2911         bdrv_snapshot_dump(sn);
2912         printf("\n");
2913     }
2914     g_free(sn_tab);
2915 }
2916 
dump_json_block_graph_info_list(BlockGraphInfoList * list)2917 static void dump_json_block_graph_info_list(BlockGraphInfoList *list)
2918 {
2919     GString *str;
2920     QObject *obj;
2921     Visitor *v = qobject_output_visitor_new(&obj);
2922 
2923     visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort);
2924     visit_complete(v, &obj);
2925     str = qobject_to_json_pretty(obj, true);
2926     assert(str != NULL);
2927     printf("%s\n", str->str);
2928     qobject_unref(obj);
2929     visit_free(v);
2930     g_string_free(str, true);
2931 }
2932 
dump_json_block_graph_info(BlockGraphInfo * info)2933 static void dump_json_block_graph_info(BlockGraphInfo *info)
2934 {
2935     GString *str;
2936     QObject *obj;
2937     Visitor *v = qobject_output_visitor_new(&obj);
2938 
2939     visit_type_BlockGraphInfo(v, NULL, &info, &error_abort);
2940     visit_complete(v, &obj);
2941     str = qobject_to_json_pretty(obj, true);
2942     assert(str != NULL);
2943     printf("%s\n", str->str);
2944     qobject_unref(obj);
2945     visit_free(v);
2946     g_string_free(str, true);
2947 }
2948 
dump_human_image_info(BlockGraphInfo * info,int indentation,const char * path)2949 static void dump_human_image_info(BlockGraphInfo *info, int indentation,
2950                                   const char *path)
2951 {
2952     BlockChildInfoList *children_list;
2953 
2954     bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation,
2955                         info->children == NULL);
2956 
2957     for (children_list = info->children; children_list;
2958          children_list = children_list->next)
2959     {
2960         BlockChildInfo *child = children_list->value;
2961         g_autofree char *child_path = NULL;
2962 
2963         printf("%*sChild node '%s%s':\n",
2964                indentation * 4, "", path, child->name);
2965         child_path = g_strdup_printf("%s%s/", path, child->name);
2966         dump_human_image_info(child->info, indentation + 1, child_path);
2967     }
2968 }
2969 
dump_human_image_info_list(BlockGraphInfoList * list)2970 static void dump_human_image_info_list(BlockGraphInfoList *list)
2971 {
2972     BlockGraphInfoList *elem;
2973     bool delim = false;
2974 
2975     for (elem = list; elem; elem = elem->next) {
2976         if (delim) {
2977             printf("\n");
2978         }
2979         delim = true;
2980 
2981         dump_human_image_info(elem->value, 0, "/");
2982     }
2983 }
2984 
str_equal_func(gconstpointer a,gconstpointer b)2985 static gboolean str_equal_func(gconstpointer a, gconstpointer b)
2986 {
2987     return strcmp(a, b) == 0;
2988 }
2989 
2990 /**
2991  * Open an image file chain and return an BlockGraphInfoList
2992  *
2993  * @filename: topmost image filename
2994  * @fmt: topmost image format (may be NULL to autodetect)
2995  * @chain: true  - enumerate entire backing file chain
2996  *         false - only topmost image file
2997  *
2998  * Returns a list of BlockNodeInfo objects or NULL if there was an error
2999  * opening an image file.  If there was an error a message will have been
3000  * printed to stderr.
3001  */
collect_image_info_list(bool image_opts,const char * filename,const char * fmt,bool chain,bool force_share)3002 static BlockGraphInfoList *collect_image_info_list(bool image_opts,
3003                                                    const char *filename,
3004                                                    const char *fmt,
3005                                                    bool chain, bool force_share)
3006 {
3007     BlockGraphInfoList *head = NULL;
3008     BlockGraphInfoList **tail = &head;
3009     GHashTable *filenames;
3010     Error *err = NULL;
3011 
3012     filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
3013 
3014     while (filename) {
3015         BlockBackend *blk;
3016         BlockDriverState *bs;
3017         BlockGraphInfo *info;
3018 
3019         if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
3020             error_report("Backing file '%s' creates an infinite loop.",
3021                          filename);
3022             goto err;
3023         }
3024         g_hash_table_insert(filenames, (gpointer)filename, NULL);
3025 
3026         blk = img_open(image_opts, filename, fmt,
3027                        BDRV_O_NO_BACKING | BDRV_O_NO_IO, false, false,
3028                        force_share);
3029         if (!blk) {
3030             goto err;
3031         }
3032         bs = blk_bs(blk);
3033 
3034         /*
3035          * Note that the returned BlockGraphInfo object will not have
3036          * information about this image's backing node, because we have opened
3037          * it with BDRV_O_NO_BACKING.  Printing this object will therefore not
3038          * duplicate the backing chain information that we obtain by walking
3039          * the chain manually here.
3040          */
3041         bdrv_graph_rdlock_main_loop();
3042         bdrv_query_block_graph_info(bs, &info, &err);
3043         bdrv_graph_rdunlock_main_loop();
3044 
3045         if (err) {
3046             error_report_err(err);
3047             blk_unref(blk);
3048             goto err;
3049         }
3050 
3051         QAPI_LIST_APPEND(tail, info);
3052 
3053         blk_unref(blk);
3054 
3055         /* Clear parameters that only apply to the topmost image */
3056         filename = fmt = NULL;
3057         image_opts = false;
3058 
3059         if (chain) {
3060             if (info->full_backing_filename) {
3061                 filename = info->full_backing_filename;
3062             } else if (info->backing_filename) {
3063                 error_report("Could not determine absolute backing filename,"
3064                              " but backing filename '%s' present",
3065                              info->backing_filename);
3066                 goto err;
3067             }
3068             if (info->backing_filename_format) {
3069                 fmt = info->backing_filename_format;
3070             }
3071         }
3072     }
3073     g_hash_table_destroy(filenames);
3074     return head;
3075 
3076 err:
3077     qapi_free_BlockGraphInfoList(head);
3078     g_hash_table_destroy(filenames);
3079     return NULL;
3080 }
3081 
img_info(const img_cmd_t * ccmd,int argc,char ** argv)3082 static int img_info(const img_cmd_t *ccmd, int argc, char **argv)
3083 {
3084     int c;
3085     OutputFormat output_format = OFORMAT_HUMAN;
3086     bool chain = false;
3087     const char *filename, *fmt;
3088     BlockGraphInfoList *list;
3089     bool image_opts = false;
3090     bool force_share = false;
3091 
3092     fmt = NULL;
3093     for(;;) {
3094         static const struct option long_options[] = {
3095             {"help", no_argument, 0, 'h'},
3096             {"format", required_argument, 0, 'f'},
3097             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3098             {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
3099             {"force-share", no_argument, 0, 'U'},
3100             {"output", required_argument, 0, OPTION_OUTPUT},
3101             {"object", required_argument, 0, OPTION_OBJECT},
3102             {0, 0, 0, 0}
3103         };
3104         c = getopt_long(argc, argv, "hf:U", long_options, NULL);
3105         if (c == -1) {
3106             break;
3107         }
3108         switch(c) {
3109         case 'h':
3110             cmd_help(ccmd, "[-f FMT | --image-opts] [--backing-chain] [-U]\n"
3111 "        [--output human|json] [--object OBJDEF] FILE\n"
3112 ,
3113 "  -f, --format FMT\n"
3114 "     specify FILE image format explicitly (default: probing is used)\n"
3115 "  --image-opts\n"
3116 "     treat FILE as an option string (key=value,..), not a file name\n"
3117 "     (incompatible with -f|--format)\n"
3118 "  --backing-chain\n"
3119 "     display information about the backing chain for copy-on-write overlays\n"
3120 "  -U, --force-share\n"
3121 "     open image in shared mode for concurrent access\n"
3122 "  --output human|json\n"
3123 "     specify output format (default: human)\n"
3124 "  --object OBJDEF\n"
3125 "     defines QEMU user-creatable object\n"
3126 "  FILE\n"
3127 "     name of the image file, or option string (key=value,..)\n"
3128 "     with --image-opts, to operate on\n"
3129 );
3130             break;
3131         case 'f':
3132             fmt = optarg;
3133             break;
3134         case OPTION_IMAGE_OPTS:
3135             image_opts = true;
3136             break;
3137         case OPTION_BACKING_CHAIN:
3138             chain = true;
3139             break;
3140         case 'U':
3141             force_share = true;
3142             break;
3143         case OPTION_OUTPUT:
3144             output_format = parse_output_format(argv[0], optarg);
3145             break;
3146         case OPTION_OBJECT:
3147             user_creatable_process_cmdline(optarg);
3148             break;
3149         default:
3150             tryhelp(argv[0]);
3151         }
3152     }
3153     if (optind != argc - 1) {
3154         error_exit(argv[0], "Expecting one image file name");
3155     }
3156     filename = argv[optind++];
3157 
3158     list = collect_image_info_list(image_opts, filename, fmt, chain,
3159                                    force_share);
3160     if (!list) {
3161         return 1;
3162     }
3163 
3164     switch (output_format) {
3165     case OFORMAT_HUMAN:
3166         dump_human_image_info_list(list);
3167         break;
3168     case OFORMAT_JSON:
3169         if (chain) {
3170             dump_json_block_graph_info_list(list);
3171         } else {
3172             dump_json_block_graph_info(list->value);
3173         }
3174         break;
3175     }
3176 
3177     qapi_free_BlockGraphInfoList(list);
3178     return 0;
3179 }
3180 
dump_map_entry(OutputFormat output_format,MapEntry * e,MapEntry * next)3181 static int dump_map_entry(OutputFormat output_format, MapEntry *e,
3182                           MapEntry *next)
3183 {
3184     switch (output_format) {
3185     case OFORMAT_HUMAN:
3186         if (e->data && !e->has_offset) {
3187             error_report("File contains external, encrypted or compressed clusters.");
3188             return -1;
3189         }
3190         if (e->data && !e->zero) {
3191             printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
3192                    e->start, e->length,
3193                    e->has_offset ? e->offset : 0,
3194                    e->filename ?: "");
3195         }
3196         /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
3197          * Modify the flags here to allow more coalescing.
3198          */
3199         if (next && (!next->data || next->zero)) {
3200             next->data = false;
3201             next->zero = true;
3202         }
3203         break;
3204     case OFORMAT_JSON:
3205         printf("{ \"start\": %"PRId64", \"length\": %"PRId64","
3206                " \"depth\": %"PRId64", \"present\": %s, \"zero\": %s,"
3207                " \"data\": %s, \"compressed\": %s",
3208                e->start, e->length, e->depth,
3209                e->present ? "true" : "false",
3210                e->zero ? "true" : "false",
3211                e->data ? "true" : "false",
3212                e->compressed ? "true" : "false");
3213         if (e->has_offset) {
3214             printf(", \"offset\": %"PRId64"", e->offset);
3215         }
3216         putchar('}');
3217 
3218         if (next) {
3219             puts(",");
3220         }
3221         break;
3222     }
3223     return 0;
3224 }
3225 
get_block_status(BlockDriverState * bs,int64_t offset,int64_t bytes,MapEntry * e)3226 static int get_block_status(BlockDriverState *bs, int64_t offset,
3227                             int64_t bytes, MapEntry *e)
3228 {
3229     int ret;
3230     int depth;
3231     BlockDriverState *file;
3232     bool has_offset;
3233     int64_t map;
3234     char *filename = NULL;
3235 
3236     GLOBAL_STATE_CODE();
3237     GRAPH_RDLOCK_GUARD_MAINLOOP();
3238 
3239     /* As an optimization, we could cache the current range of unallocated
3240      * clusters in each file of the chain, and avoid querying the same
3241      * range repeatedly.
3242      */
3243 
3244     depth = 0;
3245     for (;;) {
3246         bs = bdrv_skip_filters(bs);
3247         ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
3248         if (ret < 0) {
3249             return ret;
3250         }
3251         assert(bytes);
3252         if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
3253             break;
3254         }
3255         bs = bdrv_cow_bs(bs);
3256         if (bs == NULL) {
3257             ret = 0;
3258             break;
3259         }
3260 
3261         depth++;
3262     }
3263 
3264     has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID);
3265 
3266     if (file && has_offset) {
3267         bdrv_refresh_filename(file);
3268         filename = file->filename;
3269     }
3270 
3271     *e = (MapEntry) {
3272         .start = offset,
3273         .length = bytes,
3274         .data = !!(ret & BDRV_BLOCK_DATA),
3275         .zero = !!(ret & BDRV_BLOCK_ZERO),
3276         .compressed = !!(ret & BDRV_BLOCK_COMPRESSED),
3277         .offset = map,
3278         .has_offset = has_offset,
3279         .depth = depth,
3280         .present = !!(ret & BDRV_BLOCK_ALLOCATED),
3281         .filename = filename,
3282     };
3283 
3284     return 0;
3285 }
3286 
entry_mergeable(const MapEntry * curr,const MapEntry * next)3287 static inline bool entry_mergeable(const MapEntry *curr, const MapEntry *next)
3288 {
3289     if (curr->length == 0) {
3290         return false;
3291     }
3292     if (curr->zero != next->zero ||
3293         curr->data != next->data ||
3294         curr->compressed != next->compressed ||
3295         curr->depth != next->depth ||
3296         curr->present != next->present ||
3297         !curr->filename != !next->filename ||
3298         curr->has_offset != next->has_offset) {
3299         return false;
3300     }
3301     if (curr->filename && strcmp(curr->filename, next->filename)) {
3302         return false;
3303     }
3304     if (curr->has_offset && curr->offset + curr->length != next->offset) {
3305         return false;
3306     }
3307     return true;
3308 }
3309 
img_map(const img_cmd_t * ccmd,int argc,char ** argv)3310 static int img_map(const img_cmd_t *ccmd, int argc, char **argv)
3311 {
3312     int c;
3313     OutputFormat output_format = OFORMAT_HUMAN;
3314     BlockBackend *blk;
3315     BlockDriverState *bs;
3316     const char *filename, *fmt;
3317     int64_t length;
3318     MapEntry curr = { .length = 0 }, next;
3319     int ret = 0;
3320     bool image_opts = false;
3321     bool force_share = false;
3322     int64_t start_offset = 0;
3323     int64_t max_length = -1;
3324 
3325     fmt = NULL;
3326     for (;;) {
3327         static const struct option long_options[] = {
3328             {"help", no_argument, 0, 'h'},
3329             {"format", required_argument, 0, 'f'},
3330             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3331             {"start-offset", required_argument, 0, 's'},
3332             {"max-length", required_argument, 0, 'l'},
3333             {"force-share", no_argument, 0, 'U'},
3334             {"output", required_argument, 0, OPTION_OUTPUT},
3335             {"object", required_argument, 0, OPTION_OBJECT},
3336             {0, 0, 0, 0}
3337         };
3338         c = getopt_long(argc, argv, "hf:s:l:U",
3339                         long_options, NULL);
3340         if (c == -1) {
3341             break;
3342         }
3343         switch (c) {
3344         case 'h':
3345             cmd_help(ccmd, "[-f FMT | --image-opts]\n"
3346 "        [--start-offset OFFSET] [--max-length LENGTH]\n"
3347 "        [--output human|json] [-U] [--object OBJDEF] FILE\n"
3348 ,
3349 "  -f, --format FMT\n"
3350 "     specify FILE image format explicitly (default: probing is used)\n"
3351 "  --image-opts\n"
3352 "     treat FILE as an option string (key=value,..), not a file name\n"
3353 "     (incompatible with -f|--format)\n"
3354 "  -s, --start-offset OFFSET\n"
3355 "     start at the given OFFSET in the image, not at the beginning\n"
3356 "  -l, --max-length LENGTH\n"
3357 "     process at most LENGTH bytes instead of up to the end of the image\n"
3358 "  --output human|json\n"
3359 "     specify output format name (default: human)\n"
3360 "  -U, --force-share\n"
3361 "     open image in shared mode for concurrent access\n"
3362 "  --object OBJDEF\n"
3363 "     defines QEMU user-creatable object\n"
3364 "  FILE\n"
3365 "     the image file name, or option string (key=value,..)\n"
3366 "     with --image-opts, to operate on\n"
3367 );
3368             break;
3369         case 'f':
3370             fmt = optarg;
3371             break;
3372         case OPTION_IMAGE_OPTS:
3373             image_opts = true;
3374             break;
3375         case 's':
3376             start_offset = cvtnum("start offset", optarg, true);
3377             if (start_offset < 0) {
3378                 return 1;
3379             }
3380             break;
3381         case 'l':
3382             max_length = cvtnum("max length", optarg, true);
3383             if (max_length < 0) {
3384                 return 1;
3385             }
3386             break;
3387         case OPTION_OUTPUT:
3388             output_format = parse_output_format(argv[0], optarg);
3389             break;
3390         case 'U':
3391             force_share = true;
3392             break;
3393         case OPTION_OBJECT:
3394             user_creatable_process_cmdline(optarg);
3395             break;
3396         default:
3397             tryhelp(argv[0]);
3398         }
3399     }
3400     if (optind != argc - 1) {
3401         error_exit(argv[0], "Expecting one image file name");
3402     }
3403     filename = argv[optind];
3404 
3405     blk = img_open(image_opts, filename, fmt, 0, false, false, force_share);
3406     if (!blk) {
3407         return 1;
3408     }
3409     bs = blk_bs(blk);
3410 
3411     if (output_format == OFORMAT_HUMAN) {
3412         printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
3413     } else if (output_format == OFORMAT_JSON) {
3414         putchar('[');
3415     }
3416 
3417     length = blk_getlength(blk);
3418     if (length < 0) {
3419         error_report("Failed to get size for '%s'", filename);
3420         return 1;
3421     }
3422     if (max_length != -1) {
3423         length = MIN(start_offset + max_length, length);
3424     }
3425 
3426     curr.start = start_offset;
3427     while (curr.start + curr.length < length) {
3428         int64_t offset = curr.start + curr.length;
3429         int64_t n = length - offset;
3430 
3431         ret = get_block_status(bs, offset, n, &next);
3432         if (ret < 0) {
3433             error_report("Could not read file metadata: %s", strerror(-ret));
3434             goto out;
3435         }
3436 
3437         if (entry_mergeable(&curr, &next)) {
3438             curr.length += next.length;
3439             continue;
3440         }
3441 
3442         if (curr.length > 0) {
3443             ret = dump_map_entry(output_format, &curr, &next);
3444             if (ret < 0) {
3445                 goto out;
3446             }
3447         }
3448         curr = next;
3449     }
3450 
3451     ret = dump_map_entry(output_format, &curr, NULL);
3452     if (output_format == OFORMAT_JSON) {
3453         puts("]");
3454     }
3455 
3456 out:
3457     blk_unref(blk);
3458     return ret < 0;
3459 }
3460 
3461 /* the same as options */
3462 #define SNAPSHOT_LIST   'l'
3463 #define SNAPSHOT_CREATE 'c'
3464 #define SNAPSHOT_APPLY  'a'
3465 #define SNAPSHOT_DELETE 'd'
3466 
img_snapshot(const img_cmd_t * ccmd,int argc,char ** argv)3467 static int img_snapshot(const img_cmd_t *ccmd, int argc, char **argv)
3468 {
3469     BlockBackend *blk;
3470     BlockDriverState *bs;
3471     QEMUSnapshotInfo sn;
3472     char *filename, *fmt = NULL, *snapshot_name = NULL;
3473     int c, ret = 0;
3474     int action = 0;
3475     bool quiet = false;
3476     Error *err = NULL;
3477     bool image_opts = false;
3478     bool force_share = false;
3479     int64_t rt;
3480 
3481     /* Parse commandline parameters */
3482     for(;;) {
3483         static const struct option long_options[] = {
3484             {"help", no_argument, 0, 'h'},
3485             {"format", required_argument, 0, 'f'},
3486             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3487             {"list", no_argument, 0, SNAPSHOT_LIST},
3488             {"apply", required_argument, 0, SNAPSHOT_APPLY},
3489             {"create", required_argument, 0, SNAPSHOT_CREATE},
3490             {"delete", required_argument, 0, SNAPSHOT_DELETE},
3491             {"force-share", no_argument, 0, 'U'},
3492             {"quiet", no_argument, 0, 'q'},
3493             {"object", required_argument, 0, OPTION_OBJECT},
3494             {0, 0, 0, 0}
3495         };
3496         c = getopt_long(argc, argv, "hf:la:c:d:Uq",
3497                         long_options, NULL);
3498         if (c == -1) {
3499             break;
3500         }
3501         switch(c) {
3502         case 'h':
3503             cmd_help(ccmd, "[-f FMT | --image-opts] [-l | -a|-c|-d SNAPSHOT]\n"
3504 "        [-U] [-q] [--object OBJDEF] FILE\n"
3505 ,
3506 "  -f, --format FMT\n"
3507 "     specify FILE format explicitly (default: probing is used)\n"
3508 "  --image-opts\n"
3509 "     treat FILE as an option string (key=value,..), not a file name\n"
3510 "     (incompatible with -f|--format)\n"
3511 "  -l, --list\n"
3512 "     list snapshots in FILE (default action if no -l|-c|-a|-d is given)\n"
3513 "  -c, --create SNAPSHOT\n"
3514 "     create named snapshot\n"
3515 "  -a, --apply SNAPSHOT\n"
3516 "     apply named snapshot to the base\n"
3517 "  -d, --delete SNAPSHOT\n"
3518 "     delete named snapshot\n"
3519 "  (only one of -l|-c|-a|-d can be specified)\n"
3520 "  -U, --force-share\n"
3521 "     open image in shared mode for concurrent access\n"
3522 "  -q, --quiet\n"
3523 "     quiet mode (produce only error messages if any)\n"
3524 "  --object OBJDEF\n"
3525 "     defines QEMU user-creatable object\n"
3526 "  FILE\n"
3527 "     name of the image file, or option string (key=value,..)\n"
3528 "     with --image-opts) to operate on\n"
3529 );
3530             break;
3531         case 'f':
3532             fmt = optarg;
3533             break;
3534         case OPTION_IMAGE_OPTS:
3535             image_opts = true;
3536             break;
3537         case SNAPSHOT_LIST:
3538         case SNAPSHOT_APPLY:
3539         case SNAPSHOT_CREATE:
3540         case SNAPSHOT_DELETE:
3541             if (action) {
3542                 error_exit(argv[0], "Cannot mix '-l', '-a', '-c', '-d'");
3543                 return 0;
3544             }
3545             action = c;
3546             snapshot_name = optarg;
3547             break;
3548         case 'U':
3549             force_share = true;
3550             break;
3551         case 'q':
3552             quiet = true;
3553             break;
3554         case OPTION_OBJECT:
3555             user_creatable_process_cmdline(optarg);
3556             break;
3557         default:
3558             tryhelp(argv[0]);
3559         }
3560     }
3561 
3562     if (optind != argc - 1) {
3563         error_exit(argv[0], "Expecting one image file name");
3564     }
3565     filename = argv[optind++];
3566 
3567     if (!action) {
3568         action = SNAPSHOT_LIST;
3569     }
3570 
3571     /* Open the image */
3572     blk = img_open(image_opts, filename, fmt,
3573                    action == SNAPSHOT_LIST ? 0 : BDRV_O_RDWR,
3574                    false, quiet, force_share);
3575     if (!blk) {
3576         return 1;
3577     }
3578     bs = blk_bs(blk);
3579 
3580     /* Perform the requested action */
3581     switch(action) {
3582     case SNAPSHOT_LIST:
3583         dump_snapshots(bs);
3584         break;
3585 
3586     case SNAPSHOT_CREATE:
3587         memset(&sn, 0, sizeof(sn));
3588         pstrcpy(sn.name, sizeof(sn.name), snapshot_name);
3589 
3590         rt = g_get_real_time();
3591         sn.date_sec = rt / G_USEC_PER_SEC;
3592         sn.date_nsec = (rt % G_USEC_PER_SEC) * 1000;
3593 
3594         bdrv_graph_rdlock_main_loop();
3595         ret = bdrv_snapshot_create(bs, &sn);
3596         bdrv_graph_rdunlock_main_loop();
3597 
3598         if (ret) {
3599             error_report("Could not create snapshot '%s': %s",
3600                 snapshot_name, strerror(-ret));
3601         }
3602         break;
3603 
3604     case SNAPSHOT_APPLY:
3605         ret = bdrv_snapshot_goto(bs, snapshot_name, &err);
3606         if (ret) {
3607             error_reportf_err(err, "Could not apply snapshot '%s': ",
3608                               snapshot_name);
3609         }
3610         break;
3611 
3612     case SNAPSHOT_DELETE:
3613         bdrv_drain_all_begin();
3614         bdrv_graph_rdlock_main_loop();
3615         ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
3616         if (ret < 0) {
3617             error_report("Could not delete snapshot '%s': snapshot not "
3618                          "found", snapshot_name);
3619             ret = 1;
3620         } else {
3621             ret = bdrv_snapshot_delete(bs, sn.id_str, sn.name, &err);
3622             if (ret < 0) {
3623                 error_reportf_err(err, "Could not delete snapshot '%s': ",
3624                                   snapshot_name);
3625                 ret = 1;
3626             }
3627         }
3628         bdrv_graph_rdunlock_main_loop();
3629         bdrv_drain_all_end();
3630         break;
3631     }
3632 
3633     /* Cleanup */
3634     blk_unref(blk);
3635     if (ret) {
3636         return 1;
3637     }
3638     return 0;
3639 }
3640 
img_rebase(const img_cmd_t * ccmd,int argc,char ** argv)3641 static int img_rebase(const img_cmd_t *ccmd, int argc, char **argv)
3642 {
3643     BlockBackend *blk = NULL, *blk_old_backing = NULL, *blk_new_backing = NULL;
3644     uint8_t *buf_old = NULL;
3645     uint8_t *buf_new = NULL;
3646     BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
3647     BlockDriverState *unfiltered_bs, *unfiltered_bs_cow;
3648     BlockDriverInfo bdi = {0};
3649     char *filename;
3650     const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
3651     int c, flags, src_flags, ret;
3652     BdrvRequestFlags write_flags = 0;
3653     bool writethrough, src_writethrough;
3654     int unsafe = 0;
3655     bool force_share = false;
3656     int progress = 0;
3657     bool quiet = false;
3658     bool compress = false;
3659     Error *local_err = NULL;
3660     bool image_opts = false;
3661     int64_t write_align;
3662 
3663     /* Parse commandline parameters */
3664     fmt = NULL;
3665     cache = BDRV_DEFAULT_CACHE;
3666     src_cache = BDRV_DEFAULT_CACHE;
3667     out_baseimg = NULL;
3668     out_basefmt = NULL;
3669     for(;;) {
3670         static const struct option long_options[] = {
3671             {"help", no_argument, 0, 'h'},
3672             {"format", required_argument, 0, 'f'},
3673             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
3674             {"cache", required_argument, 0, 't'},
3675             {"compress", no_argument, 0, 'c'},
3676             {"backing", required_argument, 0, 'b'},
3677             {"backing-format", required_argument, 0, 'B'},
3678             {"backing-cache", required_argument, 0, 'T'},
3679             {"backing-unsafe", no_argument, 0, 'u'},
3680             {"force-share", no_argument, 0, 'U'},
3681             {"progress", no_argument, 0, 'p'},
3682             {"quiet", no_argument, 0, 'q'},
3683             {"object", required_argument, 0, OPTION_OBJECT},
3684             {0, 0, 0, 0}
3685         };
3686         c = getopt_long(argc, argv, "hf:t:cb:F:B:T:uUpq",
3687                         long_options, NULL);
3688         if (c == -1) {
3689             break;
3690         }
3691         switch (c) {
3692         case 'h':
3693             cmd_help(ccmd, "[-f FMT | --image-opts] [-t CACHE]\n"
3694 "        [-b BACKING_FILE [-B BACKING_FMT] [-T BACKING_CACHE]] [-u]\n"
3695 "        [-c] [-U] [-p] [-q] [--object OBJDEF] FILE\n"
3696 ,
3697 "  -f, --format FMT\n"
3698 "     specify FILE format explicitly (default: probing is used)\n"
3699 "  --image-opts\n"
3700 "     treat FILE as an option string (key=value,..), not a file name\n"
3701 "     (incompatible with -f|--format)\n"
3702 "  -t, --cache CACHE\n"
3703 "     cache mode for FILE (default: " BDRV_DEFAULT_CACHE ")\n"
3704 "  -b, --backing BACKING_FILE|\"\"\n"
3705 "     rebase onto this file (specify empty name for no backing file)\n"
3706 "  -B, --backing-format BACKING_FMT (was -F in <=10.0)\n"
3707 "     specify format for BACKING_FILE explicitly (default: probing is used)\n"
3708 "  -T, --backing-cache CACHE\n"
3709 "     BACKING_FILE cache mode (default: " BDRV_DEFAULT_CACHE ")\n"
3710 "  -u, --backing-unsafe\n"
3711 "     do not fail if BACKING_FILE can not be read\n"
3712 "  -c, --compress\n"
3713 "     compress image (when image supports this)\n"
3714 "  -U, --force-share\n"
3715 "     open image in shared mode for concurrent access\n"
3716 "  -p, --progress\n"
3717 "     display progress information\n"
3718 "  -q, --quiet\n"
3719 "     quiet mode (produce only error messages if any)\n"
3720 "  --object OBJDEF\n"
3721 "     defines QEMU user-creatable object\n"
3722 "  FILE\n"
3723 "     name of the image file, or option string (key=value,..)\n"
3724 "     with --image-opts, to operate on\n"
3725 );
3726             return 0;
3727         case 'f':
3728             fmt = optarg;
3729             break;
3730         case OPTION_IMAGE_OPTS:
3731             image_opts = true;
3732             break;
3733         case 't':
3734             cache = optarg;
3735             break;
3736         case 'b':
3737             out_baseimg = optarg;
3738             break;
3739         case 'F': /* <=10.0 */
3740         case 'B':
3741             out_basefmt = optarg;
3742             break;
3743         case 'u':
3744             unsafe = 1;
3745             break;
3746         case 'c':
3747             compress = true;
3748             break;
3749         case 'U':
3750             force_share = true;
3751             break;
3752         case 'p':
3753             progress = 1;
3754             break;
3755         case 'T':
3756             src_cache = optarg;
3757             break;
3758         case 'q':
3759             quiet = true;
3760             break;
3761         case OPTION_OBJECT:
3762             user_creatable_process_cmdline(optarg);
3763             break;
3764         default:
3765             tryhelp(argv[0]);
3766         }
3767     }
3768 
3769     if (quiet) {
3770         progress = 0;
3771     }
3772 
3773     if (optind != argc - 1) {
3774         error_exit(argv[0], "Expecting one image file name");
3775     }
3776     if (!unsafe && !out_baseimg) {
3777         error_exit(argv[0],
3778                    "Must specify backing file (-b) or use unsafe mode (-u)");
3779     }
3780     filename = argv[optind++];
3781 
3782     qemu_progress_init(progress, 2.0);
3783     qemu_progress_print(0, 100);
3784 
3785     flags = BDRV_O_RDWR | (unsafe ? BDRV_O_NO_BACKING : 0);
3786     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
3787     if (ret < 0) {
3788         error_report("Invalid cache option: %s", cache);
3789         goto out;
3790     }
3791 
3792     src_flags = 0;
3793     ret = bdrv_parse_cache_mode(src_cache, &src_flags, &src_writethrough);
3794     if (ret < 0) {
3795         error_report("Invalid source cache option: %s", src_cache);
3796         goto out;
3797     }
3798 
3799     /* The source files are opened read-only, don't care about WCE */
3800     assert((src_flags & BDRV_O_RDWR) == 0);
3801     (void) src_writethrough;
3802 
3803     /*
3804      * Open the images.
3805      *
3806      * Ignore the old backing file for unsafe rebase in case we want to correct
3807      * the reference to a renamed or moved backing file.
3808      */
3809     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
3810                    false);
3811     if (!blk) {
3812         ret = -1;
3813         goto out;
3814     }
3815     bs = blk_bs(blk);
3816 
3817     bdrv_graph_rdlock_main_loop();
3818     unfiltered_bs = bdrv_skip_filters(bs);
3819     unfiltered_bs_cow = bdrv_cow_bs(unfiltered_bs);
3820     bdrv_graph_rdunlock_main_loop();
3821 
3822     if (compress && !block_driver_can_compress(unfiltered_bs->drv)) {
3823         error_report("Compression not supported for this file format");
3824         ret = -1;
3825         goto out;
3826     } else if (compress) {
3827         write_flags |= BDRV_REQ_WRITE_COMPRESSED;
3828     }
3829 
3830     if (out_basefmt != NULL) {
3831         if (bdrv_find_format(out_basefmt) == NULL) {
3832             error_report("Invalid format name: '%s'", out_basefmt);
3833             ret = -1;
3834             goto out;
3835         }
3836     }
3837 
3838     /*
3839      * We need overlay subcluster size (or cluster size in case writes are
3840      * compressed) to make sure write requests are aligned.
3841      */
3842     ret = bdrv_get_info(unfiltered_bs, &bdi);
3843     if (ret < 0) {
3844         error_report("could not get block driver info");
3845         goto out;
3846     } else if (bdi.subcluster_size == 0) {
3847         bdi.cluster_size = bdi.subcluster_size = 1;
3848     }
3849 
3850     write_align = compress ? bdi.cluster_size : bdi.subcluster_size;
3851 
3852     /* For safe rebasing we need to compare old and new backing file */
3853     if (!unsafe) {
3854         QDict *options = NULL;
3855         BlockDriverState *base_bs;
3856 
3857         bdrv_graph_rdlock_main_loop();
3858         base_bs = bdrv_cow_bs(unfiltered_bs);
3859         bdrv_graph_rdunlock_main_loop();
3860 
3861         if (base_bs) {
3862             blk_old_backing = blk_new(qemu_get_aio_context(),
3863                                       BLK_PERM_CONSISTENT_READ,
3864                                       BLK_PERM_ALL);
3865             ret = blk_insert_bs(blk_old_backing, base_bs,
3866                                 &local_err);
3867             if (ret < 0) {
3868                 error_reportf_err(local_err,
3869                                   "Could not reuse old backing file '%s': ",
3870                                   base_bs->filename);
3871                 goto out;
3872             }
3873         } else {
3874             blk_old_backing = NULL;
3875         }
3876 
3877         if (out_baseimg[0]) {
3878             const char *overlay_filename;
3879             char *out_real_path;
3880 
3881             options = qdict_new();
3882             if (out_basefmt) {
3883                 qdict_put_str(options, "driver", out_basefmt);
3884             }
3885             if (force_share) {
3886                 qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true);
3887             }
3888 
3889             bdrv_graph_rdlock_main_loop();
3890             bdrv_refresh_filename(bs);
3891             bdrv_graph_rdunlock_main_loop();
3892             overlay_filename = bs->exact_filename[0] ? bs->exact_filename
3893                                                      : bs->filename;
3894             out_real_path =
3895                 bdrv_get_full_backing_filename_from_filename(overlay_filename,
3896                                                              out_baseimg,
3897                                                              &local_err);
3898             if (local_err) {
3899                 qobject_unref(options);
3900                 error_reportf_err(local_err,
3901                                   "Could not resolve backing filename: ");
3902                 ret = -1;
3903                 goto out;
3904             }
3905 
3906             /*
3907              * Find out whether we rebase an image on top of a previous image
3908              * in its chain.
3909              */
3910             prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path);
3911             if (prefix_chain_bs) {
3912                 qobject_unref(options);
3913                 g_free(out_real_path);
3914 
3915                 blk_new_backing = blk_new(qemu_get_aio_context(),
3916                                           BLK_PERM_CONSISTENT_READ,
3917                                           BLK_PERM_ALL);
3918                 ret = blk_insert_bs(blk_new_backing, prefix_chain_bs,
3919                                     &local_err);
3920                 if (ret < 0) {
3921                     error_reportf_err(local_err,
3922                                       "Could not reuse backing file '%s': ",
3923                                       out_baseimg);
3924                     goto out;
3925                 }
3926             } else {
3927                 blk_new_backing = blk_new_open(out_real_path, NULL,
3928                                                options, src_flags, &local_err);
3929                 g_free(out_real_path);
3930                 if (!blk_new_backing) {
3931                     error_reportf_err(local_err,
3932                                       "Could not open new backing file '%s': ",
3933                                       out_baseimg);
3934                     ret = -1;
3935                     goto out;
3936                 }
3937             }
3938         }
3939     }
3940 
3941     /*
3942      * Check each unallocated cluster in the COW file. If it is unallocated,
3943      * accesses go to the backing file. We must therefore compare this cluster
3944      * in the old and new backing file, and if they differ we need to copy it
3945      * from the old backing file into the COW file.
3946      *
3947      * If qemu-img crashes during this step, no harm is done. The content of
3948      * the image is the same as the original one at any time.
3949      */
3950     if (!unsafe) {
3951         int64_t size;
3952         int64_t old_backing_size = 0;
3953         int64_t new_backing_size = 0;
3954         uint64_t offset;
3955         int64_t n, n_old = 0, n_new = 0;
3956         float local_progress = 0;
3957 
3958         if (blk_old_backing && bdrv_opt_mem_align(blk_bs(blk_old_backing)) >
3959             bdrv_opt_mem_align(blk_bs(blk))) {
3960             buf_old = blk_blockalign(blk_old_backing, IO_BUF_SIZE);
3961         } else {
3962             buf_old = blk_blockalign(blk, IO_BUF_SIZE);
3963         }
3964         buf_new = blk_blockalign(blk_new_backing, IO_BUF_SIZE);
3965 
3966         size = blk_getlength(blk);
3967         if (size < 0) {
3968             error_report("Could not get size of '%s': %s",
3969                          filename, strerror(-size));
3970             ret = -1;
3971             goto out;
3972         }
3973         if (blk_old_backing) {
3974             old_backing_size = blk_getlength(blk_old_backing);
3975             if (old_backing_size < 0) {
3976                 char backing_name[PATH_MAX];
3977 
3978                 bdrv_get_backing_filename(bs, backing_name,
3979                                           sizeof(backing_name));
3980                 error_report("Could not get size of '%s': %s",
3981                              backing_name, strerror(-old_backing_size));
3982                 ret = -1;
3983                 goto out;
3984             }
3985         }
3986         if (blk_new_backing) {
3987             new_backing_size = blk_getlength(blk_new_backing);
3988             if (new_backing_size < 0) {
3989                 error_report("Could not get size of '%s': %s",
3990                              out_baseimg, strerror(-new_backing_size));
3991                 ret = -1;
3992                 goto out;
3993             }
3994         }
3995 
3996         if (size != 0) {
3997             local_progress = (float)100 / (size / MIN(size, IO_BUF_SIZE));
3998         }
3999 
4000         for (offset = 0; offset < size; offset += n) {
4001             bool old_backing_eof = false;
4002             int64_t n_alloc;
4003 
4004             /* How many bytes can we handle with the next read? */
4005             n = MIN(IO_BUF_SIZE, size - offset);
4006 
4007             /* If the cluster is allocated, we don't need to take action */
4008             ret = bdrv_is_allocated(unfiltered_bs, offset, n, &n);
4009             if (ret < 0) {
4010                 error_report("error while reading image metadata: %s",
4011                              strerror(-ret));
4012                 goto out;
4013             }
4014             if (ret) {
4015                 continue;
4016             }
4017 
4018             if (prefix_chain_bs) {
4019                 uint64_t bytes = n;
4020 
4021                 /*
4022                  * If cluster wasn't changed since prefix_chain, we don't need
4023                  * to take action
4024                  */
4025                 ret = bdrv_is_allocated_above(unfiltered_bs_cow,
4026                                               prefix_chain_bs, false,
4027                                               offset, n, &n);
4028                 if (ret < 0) {
4029                     error_report("error while reading image metadata: %s",
4030                                  strerror(-ret));
4031                     goto out;
4032                 }
4033                 if (!ret && n) {
4034                     continue;
4035                 }
4036                 if (!n) {
4037                     /*
4038                      * If we've reached EOF of the old backing, it means that
4039                      * offsets beyond the old backing size were read as zeroes.
4040                      * Now we will need to explicitly zero the cluster in
4041                      * order to preserve that state after the rebase.
4042                      */
4043                     n = bytes;
4044                 }
4045             }
4046 
4047             /*
4048              * At this point we know that the region [offset; offset + n)
4049              * is unallocated within the target image.  This region might be
4050              * unaligned to the target image's (sub)cluster boundaries, as
4051              * old backing may have smaller clusters (or have subclusters).
4052              * We extend it to the aligned boundaries to avoid CoW on
4053              * partial writes in blk_pwrite(),
4054              */
4055             n += offset - QEMU_ALIGN_DOWN(offset, write_align);
4056             offset = QEMU_ALIGN_DOWN(offset, write_align);
4057             n += QEMU_ALIGN_UP(offset + n, write_align) - (offset + n);
4058             n = MIN(n, size - offset);
4059             assert(!bdrv_is_allocated(unfiltered_bs, offset, n, &n_alloc) &&
4060                    n_alloc == n);
4061 
4062             /*
4063              * Much like with the target image, we'll try to read as much
4064              * of the old and new backings as we can.
4065              */
4066             n_old = MIN(n, MAX(0, old_backing_size - (int64_t) offset));
4067             n_new = MIN(n, MAX(0, new_backing_size - (int64_t) offset));
4068 
4069             /*
4070              * Read old and new backing file and take into consideration that
4071              * backing files may be smaller than the COW image.
4072              */
4073             memset(buf_old + n_old, 0, n - n_old);
4074             if (!n_old) {
4075                 old_backing_eof = true;
4076             } else {
4077                 ret = blk_pread(blk_old_backing, offset, n_old, buf_old, 0);
4078                 if (ret < 0) {
4079                     error_report("error while reading from old backing file");
4080                     goto out;
4081                 }
4082             }
4083 
4084             memset(buf_new + n_new, 0, n - n_new);
4085             if (n_new) {
4086                 ret = blk_pread(blk_new_backing, offset, n_new, buf_new, 0);
4087                 if (ret < 0) {
4088                     error_report("error while reading from new backing file");
4089                     goto out;
4090                 }
4091             }
4092 
4093             /* If they differ, we need to write to the COW file */
4094             uint64_t written = 0;
4095 
4096             while (written < n) {
4097                 int64_t pnum;
4098 
4099                 if (compare_buffers(buf_old + written, buf_new + written,
4100                                     n - written, write_align, &pnum))
4101                 {
4102                     if (old_backing_eof) {
4103                         ret = blk_pwrite_zeroes(blk, offset + written, pnum, 0);
4104                     } else {
4105                         assert(written + pnum <= IO_BUF_SIZE);
4106                         ret = blk_pwrite(blk, offset + written, pnum,
4107                                          buf_old + written, write_flags);
4108                     }
4109                     if (ret < 0) {
4110                         error_report("Error while writing to COW image: %s",
4111                             strerror(-ret));
4112                         goto out;
4113                     }
4114                 }
4115 
4116                 written += pnum;
4117                 if (offset + written >= old_backing_size) {
4118                     old_backing_eof = true;
4119                 }
4120             }
4121             qemu_progress_print(local_progress, 100);
4122         }
4123     }
4124 
4125     /*
4126      * Change the backing file. All clusters that are different from the old
4127      * backing file are overwritten in the COW file now, so the visible content
4128      * doesn't change when we switch the backing file.
4129      */
4130     if (out_baseimg && *out_baseimg) {
4131         ret = bdrv_change_backing_file(unfiltered_bs, out_baseimg, out_basefmt,
4132                                        true);
4133     } else {
4134         ret = bdrv_change_backing_file(unfiltered_bs, NULL, NULL, false);
4135     }
4136 
4137     if (ret == -ENOSPC) {
4138         error_report("Could not change the backing file to '%s': No "
4139                      "space left in the file header", out_baseimg);
4140     } else if (ret == -EINVAL && out_baseimg && !out_basefmt) {
4141         error_report("Could not change the backing file to '%s': backing "
4142                      "format must be specified", out_baseimg);
4143     } else if (ret < 0) {
4144         error_report("Could not change the backing file to '%s': %s",
4145             out_baseimg, strerror(-ret));
4146     }
4147 
4148     qemu_progress_print(100, 0);
4149     /*
4150      * TODO At this point it is possible to check if any clusters that are
4151      * allocated in the COW file are the same in the backing file. If so, they
4152      * could be dropped from the COW file. Don't do this before switching the
4153      * backing file, in case of a crash this would lead to corruption.
4154      */
4155 out:
4156     qemu_progress_end();
4157     /* Cleanup */
4158     if (!unsafe) {
4159         blk_unref(blk_old_backing);
4160         blk_unref(blk_new_backing);
4161     }
4162     qemu_vfree(buf_old);
4163     qemu_vfree(buf_new);
4164 
4165     blk_unref(blk);
4166     if (ret) {
4167         return 1;
4168     }
4169     return 0;
4170 }
4171 
img_resize(const img_cmd_t * ccmd,int argc,char ** argv)4172 static int img_resize(const img_cmd_t *ccmd, int argc, char **argv)
4173 {
4174     Error *err = NULL;
4175     int c, ret, relative;
4176     const char *filename = NULL, *fmt = NULL, *size = NULL;
4177     int64_t n, total_size, current_size;
4178     bool quiet = false;
4179     BlockBackend *blk = NULL;
4180     PreallocMode prealloc = PREALLOC_MODE_OFF;
4181     QemuOpts *param;
4182 
4183     static QemuOptsList resize_options = {
4184         .name = "resize_options",
4185         .head = QTAILQ_HEAD_INITIALIZER(resize_options.head),
4186         .desc = {
4187             {
4188                 .name = BLOCK_OPT_SIZE,
4189                 .type = QEMU_OPT_SIZE,
4190                 .help = "Virtual disk size"
4191             }, {
4192                 /* end of list */
4193             }
4194         },
4195     };
4196     bool image_opts = false;
4197     bool shrink = false;
4198 
4199     /* Parse getopt arguments */
4200     for(;;) {
4201         static const struct option long_options[] = {
4202             {"help", no_argument, 0, 'h'},
4203             {"format", required_argument, 0, 'f'},
4204             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4205             {"preallocation", required_argument, 0, OPTION_PREALLOCATION},
4206             {"shrink", no_argument, 0, OPTION_SHRINK},
4207             {"quiet", no_argument, 0, 'q'},
4208             {"object", required_argument, 0, OPTION_OBJECT},
4209             {0, 0, 0, 0}
4210         };
4211         c = getopt_long(argc, argv, "-hf:q",
4212                         long_options, NULL);
4213         if (c == -1) {
4214             break;
4215         }
4216         switch(c) {
4217         case 'h':
4218             cmd_help(ccmd, "[-f FMT | --image-opts] [--preallocation PREALLOC] [--shrink]\n"
4219 "        [-q] [--object OBJDEF] FILE [+-]SIZE[bkKMGTPE]\n"
4220 ,
4221 "  -f, --format FMT\n"
4222 "     specify FILE format explicitly (default: probing is used)\n"
4223 "  --image-opts\n"
4224 "     treat FILE as an option string (key=value,...), not a file name\n"
4225 "     (incompatible with -f|--format)\n"
4226 "  --shrink\n"
4227 "     allow operation when the new size is smaller than the original\n"
4228 "  --preallocation PREALLOC\n"
4229 "     specify FMT-specific preallocation type for the new areas\n"
4230 "  -q, --quiet\n"
4231 "     quiet mode (produce only error messages if any)\n"
4232 "  --object OBJDEF\n"
4233 "     defines QEMU user-creatable object\n"
4234 "  FILE\n"
4235 "     name of the image file, or option string (key=value,..)\n"
4236 "     with --image-opts, to operate on\n"
4237 "  [+-]SIZE[bkKMGTPE]\n"
4238 "     new image size or amount by which to shrink (-)/grow (+),\n"
4239 "     with optional multiplier suffix (powers of 1024, default is bytes)\n"
4240 );
4241             return 0;
4242         case 'f':
4243             fmt = optarg;
4244             break;
4245         case OPTION_IMAGE_OPTS:
4246             image_opts = true;
4247             break;
4248         case OPTION_PREALLOCATION:
4249             prealloc = qapi_enum_parse(&PreallocMode_lookup, optarg,
4250                                        PREALLOC_MODE__MAX, NULL);
4251             if (prealloc == PREALLOC_MODE__MAX) {
4252                 error_report("Invalid preallocation mode '%s'", optarg);
4253                 return 1;
4254             }
4255             break;
4256         case OPTION_SHRINK:
4257             shrink = true;
4258             break;
4259         case 'q':
4260             quiet = true;
4261             break;
4262         case OPTION_OBJECT:
4263             user_creatable_process_cmdline(optarg);
4264             break;
4265         case 1: /* a non-optional argument */
4266             if (!filename) {
4267                 filename = optarg;
4268                 /* see if we have -size (number) next to filename */
4269                 if (optind < argc) {
4270                     size = argv[optind];
4271                     if (size[0] == '-' && size[1] >= '0' && size[1] <= '9') {
4272                         ++optind;
4273                     } else {
4274                         size = NULL;
4275                     }
4276                 }
4277             } else if (!size) {
4278                 size = optarg;
4279             } else {
4280                 error_exit(argv[0], "Extra argument(s) in command line");
4281             }
4282             break;
4283         default:
4284             tryhelp(argv[0]);
4285         }
4286     }
4287     if (!filename && optind < argc) {
4288         filename = argv[optind++];
4289     }
4290     if (!size && optind < argc) {
4291         size = argv[optind++];
4292     }
4293     if (!filename || !size || optind < argc) {
4294         error_exit(argv[0], "Expecting image file name and size");
4295     }
4296 
4297     /* Choose grow, shrink, or absolute resize mode */
4298     switch (size[0]) {
4299     case '+':
4300         relative = 1;
4301         size++;
4302         break;
4303     case '-':
4304         relative = -1;
4305         size++;
4306         break;
4307     default:
4308         relative = 0;
4309         break;
4310     }
4311 
4312     /* Parse size */
4313     param = qemu_opts_create(&resize_options, NULL, 0, &error_abort);
4314     if (!qemu_opt_set(param, BLOCK_OPT_SIZE, size, &err)) {
4315         error_report_err(err);
4316         ret = -1;
4317         qemu_opts_del(param);
4318         goto out;
4319     }
4320     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
4321     qemu_opts_del(param);
4322 
4323     blk = img_open(image_opts, filename, fmt,
4324                    BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet,
4325                    false);
4326     if (!blk) {
4327         ret = -1;
4328         goto out;
4329     }
4330 
4331     current_size = blk_getlength(blk);
4332     if (current_size < 0) {
4333         error_report("Failed to inquire current image length: %s",
4334                      strerror(-current_size));
4335         ret = -1;
4336         goto out;
4337     }
4338 
4339     if (relative) {
4340         total_size = current_size + n * relative;
4341     } else {
4342         total_size = n;
4343     }
4344     if (total_size <= 0) {
4345         error_report("New image size must be positive");
4346         ret = -1;
4347         goto out;
4348     }
4349 
4350     if (total_size <= current_size && prealloc != PREALLOC_MODE_OFF) {
4351         error_report("Preallocation can only be used for growing images");
4352         ret = -1;
4353         goto out;
4354     }
4355 
4356     if (total_size < current_size && !shrink) {
4357         error_report("Use the --shrink option to perform a shrink operation.");
4358         warn_report("Shrinking an image will delete all data beyond the "
4359                     "shrunken image's end. Before performing such an "
4360                     "operation, make sure there is no important data there.");
4361         ret = -1;
4362         goto out;
4363     }
4364 
4365     /*
4366      * The user expects the image to have the desired size after
4367      * resizing, so pass @exact=true.  It is of no use to report
4368      * success when the image has not actually been resized.
4369      */
4370     ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
4371     if (!ret) {
4372         qprintf(quiet, "Image resized.\n");
4373     } else {
4374         error_report_err(err);
4375     }
4376 out:
4377     blk_unref(blk);
4378     if (ret) {
4379         return 1;
4380     }
4381     return 0;
4382 }
4383 
amend_status_cb(BlockDriverState * bs,int64_t offset,int64_t total_work_size,void * opaque)4384 static void amend_status_cb(BlockDriverState *bs,
4385                             int64_t offset, int64_t total_work_size,
4386                             void *opaque)
4387 {
4388     qemu_progress_print(100.f * offset / total_work_size, 0);
4389 }
4390 
print_amend_option_help(const char * format)4391 static int print_amend_option_help(const char *format)
4392 {
4393     BlockDriver *drv;
4394 
4395     GRAPH_RDLOCK_GUARD_MAINLOOP();
4396 
4397     /* Find driver and parse its options */
4398     drv = bdrv_find_format(format);
4399     if (!drv) {
4400         error_report("Unknown file format '%s'", format);
4401         return 1;
4402     }
4403 
4404     if (!drv->bdrv_amend_options) {
4405         error_report("Format driver '%s' does not support option amendment",
4406                      format);
4407         return 1;
4408     }
4409 
4410     /* Every driver supporting amendment must have amend_opts */
4411     assert(drv->amend_opts);
4412 
4413     printf("Amend options for '%s':\n", format);
4414     qemu_opts_print_help(drv->amend_opts, false);
4415     return 0;
4416 }
4417 
img_amend(const img_cmd_t * ccmd,int argc,char ** argv)4418 static int img_amend(const img_cmd_t *ccmd, int argc, char **argv)
4419 {
4420     Error *err = NULL;
4421     int c, ret = 0;
4422     char *options = NULL;
4423     QemuOptsList *amend_opts = NULL;
4424     QemuOpts *opts = NULL;
4425     const char *fmt = NULL, *filename, *cache;
4426     int flags;
4427     bool writethrough;
4428     bool quiet = false, progress = false;
4429     BlockBackend *blk = NULL;
4430     BlockDriverState *bs = NULL;
4431     bool image_opts = false;
4432     bool force = false;
4433 
4434     cache = BDRV_DEFAULT_CACHE;
4435     for (;;) {
4436         static const struct option long_options[] = {
4437             {"help", no_argument, 0, 'h'},
4438             {"options", required_argument, 0, 'o'},
4439             {"format", required_argument, 0, 'f'},
4440             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4441             {"cache", required_argument, 0, 't'},
4442             {"force", no_argument, 0, OPTION_FORCE},
4443             {"progress", no_argument, 0, 'p'},
4444             {"quiet", no_argument, 0, 'q'},
4445             {"object", required_argument, 0, OPTION_OBJECT},
4446             {0, 0, 0, 0}
4447         };
4448         c = getopt_long(argc, argv, "ho:f:t:pq",
4449                         long_options, NULL);
4450         if (c == -1) {
4451             break;
4452         }
4453 
4454         switch (c) {
4455         case 'h':
4456             cmd_help(ccmd, "-o FMT_OPTS [-f FMT | --image-opts]\n"
4457 "        [-t CACHE] [--force] [-p] [-q] [--object OBJDEF] FILE\n"
4458 ,
4459 "  -o, --options FMT_OPTS\n"
4460 "     FMT-specfic format options (required)\n"
4461 "  -f, --format FMT\n"
4462 "     specify FILE format explicitly (default: probing is used)\n"
4463 "  --image-opts\n"
4464 "     treat FILE as an option string (key=value,..), not a file name\n"
4465 "     (incompatible with -f|--format)\n"
4466 "  -t, --cache CACHE\n"
4467 "     cache mode for FILE (default: " BDRV_DEFAULT_CACHE ")\n"
4468 "  --force\n"
4469 "     allow certain unsafe operations\n"
4470 "  -p, --progres\n"
4471 "     show operation progress\n"
4472 "  -q, --quiet\n"
4473 "     quiet mode (produce only error messages if any)\n"
4474 "  --object OBJDEF\n"
4475 "     defines QEMU user-creatable object\n"
4476 "  FILE\n"
4477 "     name of the image file, or option string (key=value,..)\n"
4478 "     with --image-opts, to operate on\n"
4479 );
4480             break;
4481         case 'o':
4482             if (accumulate_options(&options, optarg) < 0) {
4483                 ret = -1;
4484                 goto out_no_progress;
4485             }
4486             break;
4487         case 'f':
4488             fmt = optarg;
4489             break;
4490         case OPTION_IMAGE_OPTS:
4491             image_opts = true;
4492             break;
4493         case 't':
4494             cache = optarg;
4495             break;
4496         case OPTION_FORCE:
4497             force = true;
4498             break;
4499         case 'p':
4500             progress = true;
4501             break;
4502         case 'q':
4503             quiet = true;
4504             break;
4505         case OPTION_OBJECT:
4506             user_creatable_process_cmdline(optarg);
4507             break;
4508         default:
4509             tryhelp(argv[0]);
4510         }
4511     }
4512 
4513     if (!options) {
4514         error_exit(argv[0], "Must specify options (-o)");
4515     }
4516 
4517     if (quiet) {
4518         progress = false;
4519     }
4520     qemu_progress_init(progress, 1.0);
4521 
4522     filename = (optind == argc - 1) ? argv[argc - 1] : NULL;
4523     if (fmt && has_help_option(options)) {
4524         /* If a format is explicitly specified (and possibly no filename is
4525          * given), print option help here */
4526         ret = print_amend_option_help(fmt);
4527         goto out;
4528     }
4529 
4530     if (optind != argc - 1) {
4531         error_report("Expecting one image file name");
4532         ret = -1;
4533         goto out;
4534     }
4535 
4536     flags = BDRV_O_RDWR;
4537     ret = bdrv_parse_cache_mode(cache, &flags, &writethrough);
4538     if (ret < 0) {
4539         error_report("Invalid cache option: %s", cache);
4540         goto out;
4541     }
4542 
4543     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4544                    false);
4545     if (!blk) {
4546         ret = -1;
4547         goto out;
4548     }
4549     bs = blk_bs(blk);
4550 
4551     fmt = bs->drv->format_name;
4552 
4553     if (has_help_option(options)) {
4554         /* If the format was auto-detected, print option help here */
4555         ret = print_amend_option_help(fmt);
4556         goto out;
4557     }
4558 
4559     bdrv_graph_rdlock_main_loop();
4560     if (!bs->drv->bdrv_amend_options) {
4561         error_report("Format driver '%s' does not support option amendment",
4562                      fmt);
4563         bdrv_graph_rdunlock_main_loop();
4564         ret = -1;
4565         goto out;
4566     }
4567 
4568     /* Every driver supporting amendment must have amend_opts */
4569     assert(bs->drv->amend_opts);
4570 
4571     amend_opts = qemu_opts_append(amend_opts, bs->drv->amend_opts);
4572     opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4573     if (!qemu_opts_do_parse(opts, options, NULL, &err)) {
4574         /* Try to parse options using the create options */
4575         amend_opts = qemu_opts_append(amend_opts, bs->drv->create_opts);
4576         qemu_opts_del(opts);
4577         opts = qemu_opts_create(amend_opts, NULL, 0, &error_abort);
4578         if (qemu_opts_do_parse(opts, options, NULL, NULL)) {
4579             error_append_hint(&err,
4580                               "This option is only supported for image creation\n");
4581         }
4582 
4583         bdrv_graph_rdunlock_main_loop();
4584         error_report_err(err);
4585         ret = -1;
4586         goto out;
4587     }
4588 
4589     /* In case the driver does not call amend_status_cb() */
4590     qemu_progress_print(0.f, 0);
4591     ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err);
4592     qemu_progress_print(100.f, 0);
4593     bdrv_graph_rdunlock_main_loop();
4594 
4595     if (ret < 0) {
4596         error_report_err(err);
4597         goto out;
4598     }
4599 
4600 out:
4601     qemu_progress_end();
4602 
4603 out_no_progress:
4604     blk_unref(blk);
4605     qemu_opts_del(opts);
4606     qemu_opts_free(amend_opts);
4607     g_free(options);
4608 
4609     if (ret) {
4610         return 1;
4611     }
4612     return 0;
4613 }
4614 
4615 typedef struct BenchData {
4616     BlockBackend *blk;
4617     uint64_t image_size;
4618     bool write;
4619     int bufsize;
4620     int step;
4621     int nrreq;
4622     int n;
4623     int flush_interval;
4624     bool drain_on_flush;
4625     uint8_t *buf;
4626     QEMUIOVector *qiov;
4627 
4628     int in_flight;
4629     bool in_flush;
4630     uint64_t offset;
4631 } BenchData;
4632 
bench_undrained_flush_cb(void * opaque,int ret)4633 static void bench_undrained_flush_cb(void *opaque, int ret)
4634 {
4635     if (ret < 0) {
4636         error_report("Failed flush request: %s", strerror(-ret));
4637         exit(EXIT_FAILURE);
4638     }
4639 }
4640 
bench_cb(void * opaque,int ret)4641 static void bench_cb(void *opaque, int ret)
4642 {
4643     BenchData *b = opaque;
4644     BlockAIOCB *acb;
4645 
4646     if (ret < 0) {
4647         error_report("Failed request: %s", strerror(-ret));
4648         exit(EXIT_FAILURE);
4649     }
4650 
4651     if (b->in_flush) {
4652         /* Just finished a flush with drained queue: Start next requests */
4653         assert(b->in_flight == 0);
4654         b->in_flush = false;
4655     } else if (b->in_flight > 0) {
4656         int remaining = b->n - b->in_flight;
4657 
4658         b->n--;
4659         b->in_flight--;
4660 
4661         /* Time for flush? Drain queue if requested, then flush */
4662         if (b->flush_interval && remaining % b->flush_interval == 0) {
4663             if (!b->in_flight || !b->drain_on_flush) {
4664                 BlockCompletionFunc *cb;
4665 
4666                 if (b->drain_on_flush) {
4667                     b->in_flush = true;
4668                     cb = bench_cb;
4669                 } else {
4670                     cb = bench_undrained_flush_cb;
4671                 }
4672 
4673                 acb = blk_aio_flush(b->blk, cb, b);
4674                 if (!acb) {
4675                     error_report("Failed to issue flush request");
4676                     exit(EXIT_FAILURE);
4677                 }
4678             }
4679             if (b->drain_on_flush) {
4680                 return;
4681             }
4682         }
4683     }
4684 
4685     while (b->n > b->in_flight && b->in_flight < b->nrreq) {
4686         int64_t offset = b->offset;
4687         /* blk_aio_* might look for completed I/Os and kick bench_cb
4688          * again, so make sure this operation is counted by in_flight
4689          * and b->offset is ready for the next submission.
4690          */
4691         b->in_flight++;
4692         b->offset += b->step;
4693         if (b->image_size <= b->bufsize) {
4694             b->offset = 0;
4695         } else {
4696             b->offset %= b->image_size - b->bufsize;
4697         }
4698         if (b->write) {
4699             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
4700         } else {
4701             acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b);
4702         }
4703         if (!acb) {
4704             error_report("Failed to issue request");
4705             exit(EXIT_FAILURE);
4706         }
4707     }
4708 }
4709 
img_bench(const img_cmd_t * ccmd,int argc,char ** argv)4710 static int img_bench(const img_cmd_t *ccmd, int argc, char **argv)
4711 {
4712     int c, ret = 0;
4713     const char *fmt = NULL, *filename;
4714     bool quiet = false;
4715     bool image_opts = false;
4716     bool is_write = false;
4717     int count = 75000;
4718     int depth = 64;
4719     int64_t offset = 0;
4720     ssize_t bufsize = 4096;
4721     int pattern = 0;
4722     ssize_t step = 0;
4723     int flush_interval = 0;
4724     bool drain_on_flush = true;
4725     int64_t image_size;
4726     BlockBackend *blk = NULL;
4727     BenchData data = {};
4728     int flags = 0;
4729     bool writethrough = false;
4730     struct timeval t1, t2;
4731     int i;
4732     bool force_share = false;
4733     size_t buf_size = 0;
4734 
4735     for (;;) {
4736         static const struct option long_options[] = {
4737             {"help", no_argument, 0, 'h'},
4738             {"format", required_argument, 0, 'f'},
4739             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
4740             {"cache", required_argument, 0, 't'},
4741             {"count", required_argument, 0, 'c'},
4742             {"depth", required_argument, 0, 'd'},
4743             {"offset", required_argument, 0, 'o'},
4744             {"buffer-size", required_argument, 0, 's'},
4745             {"step-size", required_argument, 0, 'S'},
4746             {"write", no_argument, 0, 'w'},
4747             {"pattern", required_argument, 0, OPTION_PATTERN},
4748             {"flush-interval", required_argument, 0, OPTION_FLUSH_INTERVAL},
4749             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
4750             {"aio", required_argument, 0, 'i'},
4751             {"native", no_argument, 0, 'n'},
4752             {"force-share", no_argument, 0, 'U'},
4753             {"quiet", no_argument, 0, 'q'},
4754             {"object", required_argument, 0, OPTION_OBJECT},
4755             {0, 0, 0, 0}
4756         };
4757         c = getopt_long(argc, argv, "hf:t:c:d:o:s:S:wi:nUq",
4758                         long_options, NULL);
4759         if (c == -1) {
4760             break;
4761         }
4762 
4763         switch (c) {
4764         case 'h':
4765             cmd_help(ccmd, "[-f FMT | --image-opts] [-t CACHE]\n"
4766 "        [-c COUNT] [-d DEPTH] [-o OFFSET] [-s BUFFER_SIZE] [-S STEP_SIZE]\n"
4767 "        [-w [--pattern PATTERN] [--flush-interval INTERVAL [--no-drain]]]\n"
4768 "        [-i AIO] [-n] [-U] [-q] FILE\n"
4769 ,
4770 "  -f, --format FMT\n"
4771 "     specify FILE format explicitly\n"
4772 "  --image-opts\n"
4773 "     indicates that FILE is a complete image specification\n"
4774 "     instead of a file name (incompatible with --format)\n"
4775 "  -t, --cache CACHE\n"
4776 "     cache mode for FILE (default: " BDRV_DEFAULT_CACHE ")\n"
4777 "  -c, --count COUNT\n"
4778 "     number of I/O requests to perform\n"
4779 "  -d, --depth DEPTH\n"
4780 "     number of requests to perform in parallel\n"
4781 "  -o, --offset OFFSET\n"
4782 "     start first request at this OFFSET\n"
4783 "  -s, --buffer-size BUFFER_SIZE[bkKMGTPE]\n"
4784 "     size of each I/O request, with optional multiplier suffix\n"
4785 "     (powers of 1024, default is 4K)\n"
4786 "  -S, --step-size STEP_SIZE[bkKMGTPE]\n"
4787 "     each next request offset increment, with optional multiplier suffix\n"
4788 "     (powers of 1024, default is the same as BUFFER_SIZE)\n"
4789 "  -w, --write\n"
4790 "     perform write test (default is read)\n"
4791 "  --pattern PATTERN\n"
4792 "     write this pattern byte instead of zero\n"
4793 "  --flush-interval FLUSH_INTERVAL\n"
4794 "     issue flush after this number of requests\n"
4795 "  --no-drain\n"
4796 "     do not wait when flushing pending requests\n"
4797 "  -i, --aio AIO\n"
4798 "     async-io backend (threads, native, io_uring)\n"
4799 "  -n, --native\n"
4800 "     use native AIO backend if possible\n"
4801 "  -U, --force-share\n"
4802 "     open images in shared mode for concurrent access\n"
4803 "  -q, --quiet\n"
4804 "     quiet mode (produce only error messages if any)\n"
4805 "  --object OBJDEF\n"
4806 "     defines QEMU user-creatable object\n"
4807 "  FILE\n"
4808 "     name of the image file, or option string (key=value,..)\n"
4809 "     with --image-opts, to operate on\n"
4810 );
4811             break;
4812         case 'f':
4813             fmt = optarg;
4814             break;
4815         case OPTION_IMAGE_OPTS:
4816             image_opts = true;
4817             break;
4818         case 't':
4819             ret = bdrv_parse_cache_mode(optarg, &flags, &writethrough);
4820             if (ret < 0) {
4821                 error_report("Invalid cache mode");
4822                 ret = -1;
4823                 goto out;
4824             }
4825             break;
4826         case 'c':
4827             count = cvtnum_full("request count", optarg, false, 1, INT_MAX);
4828             if (count < 0) {
4829                 return 1;
4830             }
4831             break;
4832         case 'd':
4833             depth = cvtnum_full("queue depth", optarg, false, 1, INT_MAX);
4834             if (depth < 0) {
4835                 return 1;
4836             }
4837             break;
4838         case 'n':
4839             flags |= BDRV_O_NATIVE_AIO;
4840             break;
4841         case 'i':
4842             ret = bdrv_parse_aio(optarg, &flags);
4843             if (ret < 0) {
4844                 error_report("Invalid aio option: %s", optarg);
4845                 ret = -1;
4846                 goto out;
4847             }
4848             break;
4849         case 'o':
4850             offset = cvtnum("offset", optarg, true);
4851             if (offset < 0) {
4852                 return 1;
4853             }
4854             break;
4855         case 's':
4856             bufsize = cvtnum_full("buffer size", optarg, true, 1, INT_MAX);
4857             if (bufsize < 0) {
4858                 return 1;
4859             }
4860             break;
4861         case 'S':
4862             step = cvtnum_full("step size", optarg, true, 0, INT_MAX);
4863             if (step < 0) {
4864                 return 1;
4865             }
4866             break;
4867         case 'w':
4868             flags |= BDRV_O_RDWR;
4869             is_write = true;
4870             break;
4871         case OPTION_PATTERN:
4872             pattern = cvtnum_full("pattern byte", optarg, false, 0, 0xff);
4873             if (pattern < 0) {
4874                 return 1;
4875             }
4876             break;
4877         case OPTION_FLUSH_INTERVAL:
4878             flush_interval = cvtnum_full("flush interval", optarg,
4879                                          false, 0, INT_MAX);
4880             if (flush_interval < 0) {
4881                 return 1;
4882             }
4883             break;
4884         case OPTION_NO_DRAIN:
4885             drain_on_flush = false;
4886             break;
4887         case 'U':
4888             force_share = true;
4889             break;
4890         case 'q':
4891             quiet = true;
4892             break;
4893         case OPTION_OBJECT:
4894             user_creatable_process_cmdline(optarg);
4895             break;
4896         default:
4897             tryhelp(argv[0]);
4898         }
4899     }
4900 
4901     if (optind != argc - 1) {
4902         error_exit(argv[0], "Expecting one image file name");
4903     }
4904     filename = argv[argc - 1];
4905 
4906     if (!is_write && flush_interval) {
4907         error_report("--flush-interval is only available in write tests");
4908         ret = -1;
4909         goto out;
4910     }
4911     if (flush_interval && flush_interval < depth) {
4912         error_report("Flush interval can't be smaller than depth");
4913         ret = -1;
4914         goto out;
4915     }
4916 
4917     blk = img_open(image_opts, filename, fmt, flags, writethrough, quiet,
4918                    force_share);
4919     if (!blk) {
4920         ret = -1;
4921         goto out;
4922     }
4923 
4924     image_size = blk_getlength(blk);
4925     if (image_size < 0) {
4926         ret = image_size;
4927         goto out;
4928     }
4929 
4930     data = (BenchData) {
4931         .blk            = blk,
4932         .image_size     = image_size,
4933         .bufsize        = bufsize,
4934         .step           = step ?: bufsize,
4935         .nrreq          = depth,
4936         .n              = count,
4937         .offset         = offset,
4938         .write          = is_write,
4939         .flush_interval = flush_interval,
4940         .drain_on_flush = drain_on_flush,
4941     };
4942     printf("Sending %d %s requests, %d bytes each, %d in parallel "
4943            "(starting at offset %" PRId64 ", step size %d)\n",
4944            data.n, data.write ? "write" : "read", data.bufsize, data.nrreq,
4945            data.offset, data.step);
4946     if (flush_interval) {
4947         printf("Sending flush every %d requests\n", flush_interval);
4948     }
4949 
4950     buf_size = data.nrreq * data.bufsize;
4951     data.buf = blk_blockalign(blk, buf_size);
4952     memset(data.buf, pattern, data.nrreq * data.bufsize);
4953 
4954     blk_register_buf(blk, data.buf, buf_size, &error_fatal);
4955 
4956     data.qiov = g_new(QEMUIOVector, data.nrreq);
4957     for (i = 0; i < data.nrreq; i++) {
4958         qemu_iovec_init(&data.qiov[i], 1);
4959         qemu_iovec_add(&data.qiov[i],
4960                        data.buf + i * data.bufsize, data.bufsize);
4961     }
4962 
4963     gettimeofday(&t1, NULL);
4964     bench_cb(&data, 0);
4965 
4966     while (data.n > 0) {
4967         main_loop_wait(false);
4968     }
4969     gettimeofday(&t2, NULL);
4970 
4971     printf("Run completed in %3.3f seconds.\n",
4972            (t2.tv_sec - t1.tv_sec)
4973            + ((double)(t2.tv_usec - t1.tv_usec) / 1000000));
4974 
4975 out:
4976     if (data.buf) {
4977         blk_unregister_buf(blk, data.buf, buf_size);
4978     }
4979     qemu_vfree(data.buf);
4980     blk_unref(blk);
4981 
4982     if (ret) {
4983         return 1;
4984     }
4985     return 0;
4986 }
4987 
4988 enum ImgBitmapAct {
4989     BITMAP_ADD,
4990     BITMAP_REMOVE,
4991     BITMAP_CLEAR,
4992     BITMAP_ENABLE,
4993     BITMAP_DISABLE,
4994     BITMAP_MERGE,
4995 };
4996 typedef struct ImgBitmapAction {
4997     enum ImgBitmapAct act;
4998     const char *src; /* only used for merge */
4999     QSIMPLEQ_ENTRY(ImgBitmapAction) next;
5000 } ImgBitmapAction;
5001 
img_bitmap(const img_cmd_t * ccmd,int argc,char ** argv)5002 static int img_bitmap(const img_cmd_t *ccmd, int argc, char **argv)
5003 {
5004     Error *err = NULL;
5005     int c, ret = 1;
5006     QemuOpts *opts = NULL;
5007     const char *fmt = NULL, *src_fmt = NULL, *src_filename = NULL;
5008     const char *filename, *bitmap;
5009     BlockBackend *blk = NULL, *src = NULL;
5010     BlockDriverState *bs = NULL, *src_bs = NULL;
5011     bool image_opts = false;
5012     int64_t granularity = 0;
5013     bool add = false, merge = false;
5014     QSIMPLEQ_HEAD(, ImgBitmapAction) actions;
5015     ImgBitmapAction *act, *act_next;
5016     const char *op;
5017     int inactivate_ret;
5018 
5019     QSIMPLEQ_INIT(&actions);
5020 
5021     for (;;) {
5022         static const struct option long_options[] = {
5023             {"help", no_argument, 0, 'h'},
5024             {"format", required_argument, 0, 'f'},
5025             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5026             {"add", no_argument, 0, OPTION_ADD},
5027             {"granularity", required_argument, 0, 'g'},
5028             {"remove", no_argument, 0, OPTION_REMOVE},
5029             {"clear", no_argument, 0, OPTION_CLEAR},
5030             {"enable", no_argument, 0, OPTION_ENABLE},
5031             {"disable", no_argument, 0, OPTION_DISABLE},
5032             {"merge", required_argument, 0, OPTION_MERGE},
5033             {"source-file", required_argument, 0, 'b'},
5034             {"source-format", required_argument, 0, 'F'},
5035             {"object", required_argument, 0, OPTION_OBJECT},
5036             {0, 0, 0, 0}
5037         };
5038         c = getopt_long(argc, argv, "hf:g:b:F:",
5039                         long_options, NULL);
5040         if (c == -1) {
5041             break;
5042         }
5043 
5044         switch (c) {
5045         case 'h':
5046             cmd_help(ccmd, "[-f FMT | --image-opts]\n"
5047 "        ( --add [-g SIZE] | --remove | --clear | --enable | --disable |\n"
5048 "          --merge SOURCE [-b SRC_FILE [-F SRC_FMT]] )..\n"
5049 "        [--object OBJDEF] FILE BITMAP\n"
5050 ,
5051 "  -f, --format FMT\n"
5052 "     specify FILE format explicitly (default: probing is used)\n"
5053 "  --image-opts\n"
5054 "     treat FILE as an option string (key=value,..), not a file name\n"
5055 "     (incompatible with -f|--format)\n"
5056 "  --add\n"
5057 "     creates BITMAP in FILE, enables to record future edits\n"
5058 "  -g, --granularity SIZE[bKMGTPE]\n"
5059 "     sets non-default granularity for the bitmap being added,\n"
5060 "     with optional multiplier suffix (in powers of 1024)\n"
5061 "  --remove\n"
5062 "     removes BITMAP from FILE\n"
5063 "  --clear\n"
5064 "     clears BITMAP in FILE\n"
5065 "  --enable, --disable\n"
5066 "     starts and stops recording future edits to BITMAP in FILE\n"
5067 "  --merge SOURCE\n"
5068 "     merges contents of the SOURCE bitmap into BITMAP in FILE\n"
5069 "  -b, --source-file SRC_FILE\n"
5070 "     select alternative source file for --merge\n"
5071 "  -F, --source-format SRC_FMT\n"
5072 "     specify format for SRC_FILE explicitly\n"
5073 "  --object OBJDEF\n"
5074 "     defines QEMU user-creatable object\n"
5075 "  FILE\n"
5076 "     name of the image file, or option string (key=value,..)\n"
5077 "     with --image-opts, to operate on\n"
5078 "  BITMAP\n"
5079 "     name of the bitmap to add, remove, clear, enable, disable or merge to\n"
5080 );
5081             break;
5082         case 'f':
5083             fmt = optarg;
5084             break;
5085         case OPTION_IMAGE_OPTS:
5086             image_opts = true;
5087             break;
5088         case OPTION_ADD:
5089             act = g_new0(ImgBitmapAction, 1);
5090             act->act = BITMAP_ADD;
5091             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5092             add = true;
5093             break;
5094         case 'g':
5095             granularity = cvtnum("granularity", optarg, true);
5096             if (granularity < 0) {
5097                 return 1;
5098             }
5099             break;
5100         case OPTION_REMOVE:
5101             act = g_new0(ImgBitmapAction, 1);
5102             act->act = BITMAP_REMOVE;
5103             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5104             break;
5105         case OPTION_CLEAR:
5106             act = g_new0(ImgBitmapAction, 1);
5107             act->act = BITMAP_CLEAR;
5108             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5109             break;
5110         case OPTION_ENABLE:
5111             act = g_new0(ImgBitmapAction, 1);
5112             act->act = BITMAP_ENABLE;
5113             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5114             break;
5115         case OPTION_DISABLE:
5116             act = g_new0(ImgBitmapAction, 1);
5117             act->act = BITMAP_DISABLE;
5118             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5119             break;
5120         case OPTION_MERGE:
5121             act = g_new0(ImgBitmapAction, 1);
5122             act->act = BITMAP_MERGE;
5123             act->src = optarg;
5124             QSIMPLEQ_INSERT_TAIL(&actions, act, next);
5125             merge = true;
5126             break;
5127         case 'b':
5128             src_filename = optarg;
5129             break;
5130         case 'F':
5131             src_fmt = optarg;
5132             break;
5133         case OPTION_OBJECT:
5134             user_creatable_process_cmdline(optarg);
5135             break;
5136         default:
5137             tryhelp(argv[0]);
5138         }
5139     }
5140 
5141     if (QSIMPLEQ_EMPTY(&actions)) {
5142         error_report("Need at least one of --add, --remove, --clear, "
5143                      "--enable, --disable, or --merge");
5144         goto out;
5145     }
5146 
5147     if (granularity && !add) {
5148         error_report("granularity only supported with --add");
5149         goto out;
5150     }
5151     if (src_fmt && !src_filename) {
5152         error_report("-F only supported with -b");
5153         goto out;
5154     }
5155     if (src_filename && !merge) {
5156         error_report("Merge bitmap source file only supported with "
5157                      "--merge");
5158         goto out;
5159     }
5160 
5161     if (optind != argc - 2) {
5162         error_report("Expecting filename and bitmap name");
5163         goto out;
5164     }
5165 
5166     filename = argv[optind];
5167     bitmap = argv[optind + 1];
5168 
5169     /*
5170      * No need to open backing chains; we will be manipulating bitmaps
5171      * directly in this image without reference to image contents.
5172      */
5173     blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_NO_BACKING,
5174                    false, false, false);
5175     if (!blk) {
5176         goto out;
5177     }
5178     bs = blk_bs(blk);
5179     if (src_filename) {
5180         src = img_open(false, src_filename, src_fmt, BDRV_O_NO_BACKING,
5181                        false, false, false);
5182         if (!src) {
5183             goto out;
5184         }
5185         src_bs = blk_bs(src);
5186     } else {
5187         src_bs = bs;
5188     }
5189 
5190     QSIMPLEQ_FOREACH_SAFE(act, &actions, next, act_next) {
5191         switch (act->act) {
5192         case BITMAP_ADD:
5193             qmp_block_dirty_bitmap_add(bs->node_name, bitmap,
5194                                        !!granularity, granularity, true, true,
5195                                        false, false, &err);
5196             op = "add";
5197             break;
5198         case BITMAP_REMOVE:
5199             qmp_block_dirty_bitmap_remove(bs->node_name, bitmap, &err);
5200             op = "remove";
5201             break;
5202         case BITMAP_CLEAR:
5203             qmp_block_dirty_bitmap_clear(bs->node_name, bitmap, &err);
5204             op = "clear";
5205             break;
5206         case BITMAP_ENABLE:
5207             qmp_block_dirty_bitmap_enable(bs->node_name, bitmap, &err);
5208             op = "enable";
5209             break;
5210         case BITMAP_DISABLE:
5211             qmp_block_dirty_bitmap_disable(bs->node_name, bitmap, &err);
5212             op = "disable";
5213             break;
5214         case BITMAP_MERGE:
5215             do_dirty_bitmap_merge(bs->node_name, bitmap, src_bs->node_name,
5216                                   act->src, &err);
5217             op = "merge";
5218             break;
5219         default:
5220             g_assert_not_reached();
5221         }
5222 
5223         if (err) {
5224             error_reportf_err(err, "Operation %s on bitmap %s failed: ",
5225                               op, bitmap);
5226             goto out;
5227         }
5228         g_free(act);
5229     }
5230 
5231     ret = 0;
5232 
5233  out:
5234     /*
5235      * Manually inactivate the images first because this way we can know whether
5236      * an error occurred. blk_unref() doesn't tell us about failures.
5237      */
5238     inactivate_ret = bdrv_inactivate_all();
5239     if (inactivate_ret < 0) {
5240         error_report("Error while closing the image: %s", strerror(-inactivate_ret));
5241         ret = 1;
5242     }
5243 
5244     blk_unref(src);
5245     blk_unref(blk);
5246     qemu_opts_del(opts);
5247     return ret;
5248 }
5249 
5250 #define C_BS      01
5251 #define C_COUNT   02
5252 #define C_IF      04
5253 #define C_OF      010
5254 #define C_SKIP    020
5255 
5256 struct DdInfo {
5257     unsigned int flags;
5258     int64_t count;
5259 };
5260 
5261 struct DdIo {
5262     int bsz;    /* Block size */
5263     char *filename;
5264     uint8_t *buf;
5265     int64_t offset;
5266 };
5267 
5268 struct DdOpts {
5269     const char *name;
5270     int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
5271     unsigned int flag;
5272 };
5273 
img_dd_bs(const char * arg,struct DdIo * in,struct DdIo * out,struct DdInfo * dd)5274 static int img_dd_bs(const char *arg,
5275                      struct DdIo *in, struct DdIo *out,
5276                      struct DdInfo *dd)
5277 {
5278     int64_t res;
5279 
5280     res = cvtnum_full("bs", arg, true, 1, INT_MAX);
5281 
5282     if (res < 0) {
5283         return 1;
5284     }
5285     in->bsz = out->bsz = res;
5286 
5287     return 0;
5288 }
5289 
img_dd_count(const char * arg,struct DdIo * in,struct DdIo * out,struct DdInfo * dd)5290 static int img_dd_count(const char *arg,
5291                         struct DdIo *in, struct DdIo *out,
5292                         struct DdInfo *dd)
5293 {
5294     dd->count = cvtnum("count", arg, true);
5295 
5296     if (dd->count < 0) {
5297         return 1;
5298     }
5299 
5300     return 0;
5301 }
5302 
img_dd_if(const char * arg,struct DdIo * in,struct DdIo * out,struct DdInfo * dd)5303 static int img_dd_if(const char *arg,
5304                      struct DdIo *in, struct DdIo *out,
5305                      struct DdInfo *dd)
5306 {
5307     in->filename = g_strdup(arg);
5308 
5309     return 0;
5310 }
5311 
img_dd_of(const char * arg,struct DdIo * in,struct DdIo * out,struct DdInfo * dd)5312 static int img_dd_of(const char *arg,
5313                      struct DdIo *in, struct DdIo *out,
5314                      struct DdInfo *dd)
5315 {
5316     out->filename = g_strdup(arg);
5317 
5318     return 0;
5319 }
5320 
img_dd_skip(const char * arg,struct DdIo * in,struct DdIo * out,struct DdInfo * dd)5321 static int img_dd_skip(const char *arg,
5322                        struct DdIo *in, struct DdIo *out,
5323                        struct DdInfo *dd)
5324 {
5325     in->offset = cvtnum("skip", arg, true);
5326 
5327     if (in->offset < 0) {
5328         return 1;
5329     }
5330 
5331     return 0;
5332 }
5333 
img_dd(const img_cmd_t * ccmd,int argc,char ** argv)5334 static int img_dd(const img_cmd_t *ccmd, int argc, char **argv)
5335 {
5336     int ret = 0;
5337     char *arg = NULL;
5338     char *tmp;
5339     BlockDriver *drv = NULL, *proto_drv = NULL;
5340     BlockBackend *blk1 = NULL, *blk2 = NULL;
5341     QemuOpts *opts = NULL;
5342     QemuOptsList *create_opts = NULL;
5343     Error *local_err = NULL;
5344     bool image_opts = false;
5345     int c, i;
5346     const char *out_fmt = "raw";
5347     const char *fmt = NULL;
5348     int64_t size = 0;
5349     int64_t out_pos, in_pos;
5350     bool force_share = false;
5351     struct DdInfo dd = {
5352         .flags = 0,
5353         .count = 0,
5354     };
5355     struct DdIo in = {
5356         .bsz = 512, /* Block size is by default 512 bytes */
5357         .filename = NULL,
5358         .buf = NULL,
5359         .offset = 0
5360     };
5361     struct DdIo out = {
5362         .bsz = 512,
5363         .filename = NULL,
5364         .buf = NULL,
5365         .offset = 0
5366     };
5367 
5368     const struct DdOpts options[] = {
5369         { "bs", img_dd_bs, C_BS },
5370         { "count", img_dd_count, C_COUNT },
5371         { "if", img_dd_if, C_IF },
5372         { "of", img_dd_of, C_OF },
5373         { "skip", img_dd_skip, C_SKIP },
5374         { NULL, NULL, 0 }
5375     };
5376     const struct option long_options[] = {
5377         { "help", no_argument, 0, 'h'},
5378         { "format", required_argument, 0, 'f'},
5379         { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5380         { "output-format", required_argument, 0, 'O'},
5381         { "force-share", no_argument, 0, 'U'},
5382         { "object", required_argument, 0, OPTION_OBJECT},
5383         { 0, 0, 0, 0 }
5384     };
5385 
5386     while ((c = getopt_long(argc, argv, "hf:O:U", long_options, NULL))) {
5387         if (c == EOF) {
5388             break;
5389         }
5390         switch (c) {
5391         case 'h':
5392             cmd_help(ccmd, "[-f FMT|--image-opts] [-O OUTPUT_FMT] [-U]\n"
5393 "        [--object OBJDEF] [bs=BLOCK_SIZE] [count=BLOCKS] if=INPUT of=OUTPUT\n"
5394 ,
5395 "  -f, --format FMT\n"
5396 "     specify format for INPUT explicitly (default: probing is used)\n"
5397 "  --image-opts\n"
5398 "     treat INPUT as an option string (key=value,..), not a file name\n"
5399 "     (incompatible with -f|--format)\n"
5400 "  -O, --output-format OUTPUT_FMT\n"
5401 "     format of the OUTPUT (default: raw)\n"
5402 "  -U, --force-share\n"
5403 "     open images in shared mode for concurrent access\n"
5404 "  --object OBJDEF\n"
5405 "     defines QEMU user-creatable object\n"
5406 "  bs=BLOCK_SIZE[bKMGTP]\n"
5407 "     size of the I/O block, with optional multiplier suffix (powers of 1024)\n"
5408 "     (default: 512)\n"
5409 "  count=COUNT\n"
5410 "     number of blocks to convert (default whole INPUT)\n"
5411 "  if=INPUT\n"
5412 "     name of the file, or option string (key=value,..)\n"
5413 "     with --image-opts, to use for input\n"
5414 "  of=OUTPUT\n"
5415 "     output file name to create (will be overridden if alrady exists)\n"
5416 );
5417             break;
5418         case 'f':
5419             fmt = optarg;
5420             break;
5421         case OPTION_IMAGE_OPTS:
5422             image_opts = true;
5423             break;
5424         case 'O':
5425             out_fmt = optarg;
5426             break;
5427         case 'U':
5428             force_share = true;
5429             break;
5430         case OPTION_OBJECT:
5431             user_creatable_process_cmdline(optarg);
5432             break;
5433         default:
5434             tryhelp(argv[0]);
5435         }
5436     }
5437 
5438     for (i = optind; i < argc; i++) {
5439         int j;
5440         arg = g_strdup(argv[i]);
5441 
5442         tmp = strchr(arg, '=');
5443         if (tmp == NULL) {
5444             error_report("unrecognized operand %s", arg);
5445             ret = -1;
5446             goto out;
5447         }
5448 
5449         *tmp++ = '\0';
5450 
5451         for (j = 0; options[j].name != NULL; j++) {
5452             if (!strcmp(arg, options[j].name)) {
5453                 break;
5454             }
5455         }
5456         if (options[j].name == NULL) {
5457             error_report("unrecognized operand %s", arg);
5458             ret = -1;
5459             goto out;
5460         }
5461 
5462         if (options[j].f(tmp, &in, &out, &dd) != 0) {
5463             ret = -1;
5464             goto out;
5465         }
5466         dd.flags |= options[j].flag;
5467         g_free(arg);
5468         arg = NULL;
5469     }
5470 
5471     if (!(dd.flags & C_IF && dd.flags & C_OF)) {
5472         error_report("Must specify both input and output files");
5473         ret = -1;
5474         goto out;
5475     }
5476 
5477     blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
5478                     force_share);
5479 
5480     if (!blk1) {
5481         ret = -1;
5482         goto out;
5483     }
5484 
5485     drv = bdrv_find_format(out_fmt);
5486     if (!drv) {
5487         error_report("Unknown file format");
5488         ret = -1;
5489         goto out;
5490     }
5491     proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
5492 
5493     if (!proto_drv) {
5494         error_report_err(local_err);
5495         ret = -1;
5496         goto out;
5497     }
5498     if (!drv->create_opts) {
5499         error_report("Format driver '%s' does not support image creation",
5500                      drv->format_name);
5501         ret = -1;
5502         goto out;
5503     }
5504     if (!proto_drv->create_opts) {
5505         error_report("Protocol driver '%s' does not support image creation",
5506                      proto_drv->format_name);
5507         ret = -1;
5508         goto out;
5509     }
5510     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5511     create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5512 
5513     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5514 
5515     size = blk_getlength(blk1);
5516     if (size < 0) {
5517         error_report("Failed to get size for '%s'", in.filename);
5518         ret = -1;
5519         goto out;
5520     }
5521 
5522     if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
5523         dd.count * in.bsz < size) {
5524         size = dd.count * in.bsz;
5525     }
5526 
5527     /* Overflow means the specified offset is beyond input image's size */
5528     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5529                               size < in.bsz * in.offset)) {
5530         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
5531     } else {
5532         qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
5533                             size - in.bsz * in.offset, &error_abort);
5534     }
5535 
5536     ret = bdrv_create(drv, out.filename, opts, &local_err);
5537     if (ret < 0) {
5538         error_reportf_err(local_err,
5539                           "%s: error while creating output image: ",
5540                           out.filename);
5541         ret = -1;
5542         goto out;
5543     }
5544 
5545     /* TODO, we can't honour --image-opts for the target,
5546      * since it needs to be given in a format compatible
5547      * with the bdrv_create() call above which does not
5548      * support image-opts style.
5549      */
5550     blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
5551                          false, false, false);
5552 
5553     if (!blk2) {
5554         ret = -1;
5555         goto out;
5556     }
5557 
5558     if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
5559                               size < in.offset * in.bsz)) {
5560         /* We give a warning if the skip option is bigger than the input
5561          * size and create an empty output disk image (i.e. like dd(1)).
5562          */
5563         error_report("%s: cannot skip to specified offset", in.filename);
5564         in_pos = size;
5565     } else {
5566         in_pos = in.offset * in.bsz;
5567     }
5568 
5569     in.buf = g_new(uint8_t, in.bsz);
5570 
5571     for (out_pos = 0; in_pos < size; ) {
5572         int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz;
5573 
5574         ret = blk_pread(blk1, in_pos, bytes, in.buf, 0);
5575         if (ret < 0) {
5576             error_report("error while reading from input image file: %s",
5577                          strerror(-ret));
5578             goto out;
5579         }
5580         in_pos += bytes;
5581 
5582         ret = blk_pwrite(blk2, out_pos, bytes, in.buf, 0);
5583         if (ret < 0) {
5584             error_report("error while writing to output image file: %s",
5585                          strerror(-ret));
5586             goto out;
5587         }
5588         out_pos += bytes;
5589     }
5590 
5591 out:
5592     g_free(arg);
5593     qemu_opts_del(opts);
5594     qemu_opts_free(create_opts);
5595     blk_unref(blk1);
5596     blk_unref(blk2);
5597     g_free(in.filename);
5598     g_free(out.filename);
5599     g_free(in.buf);
5600     g_free(out.buf);
5601 
5602     if (ret) {
5603         return 1;
5604     }
5605     return 0;
5606 }
5607 
dump_json_block_measure_info(BlockMeasureInfo * info)5608 static void dump_json_block_measure_info(BlockMeasureInfo *info)
5609 {
5610     GString *str;
5611     QObject *obj;
5612     Visitor *v = qobject_output_visitor_new(&obj);
5613 
5614     visit_type_BlockMeasureInfo(v, NULL, &info, &error_abort);
5615     visit_complete(v, &obj);
5616     str = qobject_to_json_pretty(obj, true);
5617     assert(str != NULL);
5618     printf("%s\n", str->str);
5619     qobject_unref(obj);
5620     visit_free(v);
5621     g_string_free(str, true);
5622 }
5623 
img_measure(const img_cmd_t * ccmd,int argc,char ** argv)5624 static int img_measure(const img_cmd_t *ccmd, int argc, char **argv)
5625 {
5626     OutputFormat output_format = OFORMAT_HUMAN;
5627     BlockBackend *in_blk = NULL;
5628     BlockDriver *drv;
5629     const char *filename = NULL;
5630     const char *fmt = NULL;
5631     const char *out_fmt = "raw";
5632     char *options = NULL;
5633     char *snapshot_name = NULL;
5634     bool force_share = false;
5635     QemuOpts *opts = NULL;
5636     QemuOpts *object_opts = NULL;
5637     QemuOpts *sn_opts = NULL;
5638     QemuOptsList *create_opts = NULL;
5639     bool image_opts = false;
5640     int64_t img_size = -1;
5641     BlockMeasureInfo *info = NULL;
5642     Error *local_err = NULL;
5643     int ret = 1;
5644     int c;
5645 
5646     static const struct option long_options[] = {
5647         {"help", no_argument, 0, 'h'},
5648         {"source-format", required_argument, 0, 'f'}, /* img_convert */
5649         {"format", required_argument, 0, 'f'},
5650         {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
5651         {"source-image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, /* img_convert */
5652         {"snapshot", required_argument, 0, 'l'},
5653         {"target-format", required_argument, 0, 'O'},
5654         {"target-format-options", required_argument, 0, 'o'}, /* img_convert */
5655         {"options", required_argument, 0, 'o'},
5656         {"force-share", no_argument, 0, 'U'},
5657         {"output", required_argument, 0, OPTION_OUTPUT},
5658         {"object", required_argument, 0, OPTION_OBJECT},
5659         {"size", required_argument, 0, 's'},
5660         {0, 0, 0, 0}
5661     };
5662 
5663     while ((c = getopt_long(argc, argv, "hf:l:O:o:Us:",
5664                             long_options, NULL)) != -1) {
5665         switch (c) {
5666         case 'h':
5667             cmd_help(ccmd, "[-f FMT|--image-opts] [-l SNAPSHOT]\n"
5668 "       [-O TARGET_FMT] [-o TARGET_FMT_OPTS] [--output human|json]\n"
5669 "       [--object OBJDEF] (--size SIZE | FILE)\n"
5670 ,
5671 "  -f, --format\n"
5672 "     specify format of FILE explicitly (default: probing is used)\n"
5673 "  --image-opts\n"
5674 "     indicates that FILE is a complete image specification\n"
5675 "     instead of a file name (incompatible with --format)\n"
5676 "  -l, --snapshot SNAPSHOT\n"
5677 "     use this snapshot in FILE as source\n"
5678 "  -O, --target-format TARGET_FMT\n"
5679 "     desired target/output image format (default: raw)\n"
5680 "  -o TARGET_FMT_OPTS\n"
5681 "     options specific to TARGET_FMT\n"
5682 "  --output human|json\n"
5683 "     output format (default: human)\n"
5684 "  -U, --force-share\n"
5685 "     open images in shared mode for concurrent access\n"
5686 "  --object OBJDEF\n"
5687 "     defines QEMU user-creatable object\n"
5688 "  -s, --size SIZE[bKMGTPE]\n"
5689 "     measure file size for given image size,\n"
5690 "     with optional multiplier suffix (powers of 1024)\n"
5691 "  FILE\n"
5692 "     measure file size required to convert from FILE (either a file name\n"
5693 "     or an option string (key=value,..) with --image-options)\n"
5694 );
5695             break;
5696         case 'f':
5697             fmt = optarg;
5698             break;
5699         case OPTION_IMAGE_OPTS:
5700             image_opts = true;
5701             break;
5702         case 'l':
5703             if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
5704                 sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
5705                                                   optarg, false);
5706                 if (!sn_opts) {
5707                     error_report("Failed in parsing snapshot param '%s'",
5708                                  optarg);
5709                     goto out;
5710                 }
5711             } else {
5712                 snapshot_name = optarg;
5713             }
5714             break;
5715         case 'O':
5716             out_fmt = optarg;
5717             break;
5718         case 'o':
5719             if (accumulate_options(&options, optarg) < 0) {
5720                 goto out;
5721             }
5722             break;
5723         case 'U':
5724             force_share = true;
5725             break;
5726         case OPTION_OUTPUT:
5727             output_format = parse_output_format(argv[0], optarg);
5728             break;
5729         case OPTION_OBJECT:
5730             user_creatable_process_cmdline(optarg);
5731             break;
5732         case 's':
5733             img_size = cvtnum("image size", optarg, true);
5734             if (img_size < 0) {
5735                 goto out;
5736             }
5737             break;
5738         default:
5739             tryhelp(argv[0]);
5740         }
5741     }
5742 
5743     if (argc - optind > 1) {
5744         error_report("At most one filename argument is allowed.");
5745         goto out;
5746     } else if (argc - optind == 1) {
5747         filename = argv[optind];
5748     }
5749 
5750     if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
5751         error_report("--image-opts, -f, and -l require a filename argument.");
5752         goto out;
5753     }
5754     if (filename && img_size != -1) {
5755         error_report("--size N cannot be used together with a filename.");
5756         goto out;
5757     }
5758     if (!filename && img_size == -1) {
5759         error_report("Either --size N or one filename must be specified.");
5760         goto out;
5761     }
5762 
5763     if (filename) {
5764         in_blk = img_open(image_opts, filename, fmt, 0,
5765                           false, false, force_share);
5766         if (!in_blk) {
5767             goto out;
5768         }
5769 
5770         if (sn_opts) {
5771             bdrv_snapshot_load_tmp(blk_bs(in_blk),
5772                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
5773                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
5774                     &local_err);
5775         } else if (snapshot_name != NULL) {
5776             bdrv_snapshot_load_tmp_by_id_or_name(blk_bs(in_blk),
5777                     snapshot_name, &local_err);
5778         }
5779         if (local_err) {
5780             error_reportf_err(local_err, "Failed to load snapshot: ");
5781             goto out;
5782         }
5783     }
5784 
5785     drv = bdrv_find_format(out_fmt);
5786     if (!drv) {
5787         error_report("Unknown file format '%s'", out_fmt);
5788         goto out;
5789     }
5790     if (!drv->create_opts) {
5791         error_report("Format driver '%s' does not support image creation",
5792                      drv->format_name);
5793         goto out;
5794     }
5795 
5796     create_opts = qemu_opts_append(create_opts, drv->create_opts);
5797     create_opts = qemu_opts_append(create_opts, bdrv_file.create_opts);
5798     opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5799     if (options) {
5800         if (!qemu_opts_do_parse(opts, options, NULL, &local_err)) {
5801             error_report_err(local_err);
5802             error_report("Invalid options for file format '%s'", out_fmt);
5803             goto out;
5804         }
5805     }
5806     if (img_size != -1) {
5807         qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
5808     }
5809 
5810     info = bdrv_measure(drv, opts, in_blk ? blk_bs(in_blk) : NULL, &local_err);
5811     if (local_err) {
5812         error_report_err(local_err);
5813         goto out;
5814     }
5815 
5816     if (output_format == OFORMAT_HUMAN) {
5817         printf("required size: %" PRIu64 "\n", info->required);
5818         printf("fully allocated size: %" PRIu64 "\n", info->fully_allocated);
5819         if (info->has_bitmaps) {
5820             printf("bitmaps size: %" PRIu64 "\n", info->bitmaps);
5821         }
5822     } else {
5823         dump_json_block_measure_info(info);
5824     }
5825 
5826     ret = 0;
5827 
5828 out:
5829     qapi_free_BlockMeasureInfo(info);
5830     qemu_opts_del(object_opts);
5831     qemu_opts_del(opts);
5832     qemu_opts_del(sn_opts);
5833     qemu_opts_free(create_opts);
5834     g_free(options);
5835     blk_unref(in_blk);
5836     return ret;
5837 }
5838 
5839 static const img_cmd_t img_cmds[] = {
5840     { "amend", img_amend,
5841       "Update format-specific options of the image" },
5842     { "bench", img_bench,
5843       "Run a simple image benchmark" },
5844     { "bitmap", img_bitmap,
5845       "Perform modifications of the persistent bitmap in the image" },
5846     { "check", img_check,
5847       "Check basic image integrity" },
5848     { "commit", img_commit,
5849       "Commit image to its backing file" },
5850     { "compare", img_compare,
5851       "Check if two images have the same contents" },
5852     { "convert", img_convert,
5853       "Copy one or more images to another with optional format conversion" },
5854     { "create", img_create,
5855       "Create and format a new image file" },
5856     { "dd", img_dd,
5857       "Copy input to output with optional format conversion" },
5858     { "info", img_info,
5859       "Display information about the image" },
5860     { "map", img_map,
5861       "Dump image metadata" },
5862     { "measure", img_measure,
5863       "Calculate the file size required for a new image" },
5864     { "rebase", img_rebase,
5865       "Change the backing file of the image" },
5866     { "resize", img_resize,
5867       "Resize the image" },
5868     { "snapshot", img_snapshot,
5869       "List or manipulate snapshots in the image" },
5870     { NULL, NULL, },
5871 };
5872 
format_print(void * opaque,const char * name)5873 static void format_print(void *opaque, const char *name)
5874 {
5875     int *np = opaque;
5876     if (*np + strlen(name) > 75) {
5877         printf("\n ");
5878         *np = 1;
5879     }
5880     *np += printf(" %s", name);
5881 }
5882 
main(int argc,char ** argv)5883 int main(int argc, char **argv)
5884 {
5885     const img_cmd_t *cmd;
5886     const char *cmdname;
5887     int c;
5888     static const struct option long_options[] = {
5889         {"help", no_argument, 0, 'h'},
5890         {"version", no_argument, 0, 'V'},
5891         {"trace", required_argument, NULL, 'T'},
5892         {0, 0, 0, 0}
5893     };
5894 
5895 #ifdef CONFIG_POSIX
5896     signal(SIGPIPE, SIG_IGN);
5897 #endif
5898 
5899     socket_init();
5900     error_init(argv[0]);
5901     module_call_init(MODULE_INIT_TRACE);
5902     qemu_init_exec_dir(argv[0]);
5903 
5904     qemu_init_main_loop(&error_fatal);
5905 
5906     qcrypto_init(&error_fatal);
5907 
5908     module_call_init(MODULE_INIT_QOM);
5909     bdrv_init();
5910 
5911     qemu_add_opts(&qemu_source_opts);
5912     qemu_add_opts(&qemu_trace_opts);
5913 
5914     while ((c = getopt_long(argc, argv, "+hVT:", long_options, NULL)) != -1) {
5915         switch (c) {
5916         case 'h':
5917             printf(
5918 QEMU_IMG_VERSION
5919 "QEMU disk image utility.  Usage:\n"
5920 "\n"
5921 "  qemu-img [standard options] COMMAND [--help | command options]\n"
5922 "\n"
5923 "Standard options:\n"
5924 "  -h, --help\n"
5925 "     display this help and exit\n"
5926 "  -V, --version\n"
5927 "     display version info and exit\n"
5928 "  -T,--trace TRACE\n"
5929 "     specify tracing options:\n"
5930 "        [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
5931 "\n"
5932 "Recognized commands (run qemu-img COMMAND --help for command-specific help):\n\n");
5933             for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5934                 printf("  %s - %s\n", cmd->name, cmd->description);
5935             }
5936             printf("\nSupported image formats:\n");
5937             c = 99; /* force a newline */
5938             bdrv_iterate_format(format_print, &c, false);
5939             if (c) {
5940                 printf("\n");
5941             }
5942             printf("\n" QEMU_HELP_BOTTOM "\n");
5943             return 0;
5944         case 'V':
5945             printf(QEMU_IMG_VERSION);
5946             return 0;
5947         case 'T':
5948             trace_opt_parse(optarg);
5949             break;
5950         default:
5951             tryhelp(argv[0]);
5952         }
5953     }
5954 
5955     if (optind >= argc) {
5956         error_exit(argv[0], "Not enough arguments");
5957     }
5958 
5959     cmdname = argv[optind];
5960 
5961     if (!trace_init_backends()) {
5962         exit(1);
5963     }
5964     trace_init_file();
5965     qemu_set_log(LOG_TRACE, &error_fatal);
5966 
5967     /* find the command */
5968     for (cmd = img_cmds; cmd->name != NULL; cmd++) {
5969         if (!strcmp(cmdname, cmd->name)) {
5970             g_autofree char *argv0 = g_strdup_printf("%s %s", argv[0], cmdname);
5971             /* reset options and getopt processing (incl return order) */
5972             argv += optind;
5973             argc -= optind;
5974             qemu_reset_optind();
5975             argv[0] = argv0;
5976             return cmd->handler(cmd, argc, argv);
5977         }
5978     }
5979 
5980     /* not found */
5981     error_exit(argv[0], "Command not found: %s", cmdname);
5982 }
5983