1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "qemu/osdep.h" 25 #include "qemu-common.h" 26 #include "block/block_int.h" 27 #include "sysemu/block-backend.h" 28 #include "qemu/module.h" 29 #include <zlib.h> 30 #include "block/qcow2.h" 31 #include "qemu/error-report.h" 32 #include "qapi/qmp/qerror.h" 33 #include "qapi/qmp/qbool.h" 34 #include "qapi/util.h" 35 #include "qapi/qmp/types.h" 36 #include "qapi-event.h" 37 #include "trace.h" 38 #include "qemu/option_int.h" 39 40 /* 41 Differences with QCOW: 42 43 - Support for multiple incremental snapshots. 44 - Memory management by reference counts. 45 - Clusters which have a reference count of one have the bit 46 QCOW_OFLAG_COPIED to optimize write performance. 47 - Size of compressed clusters is stored in sectors to reduce bit usage 48 in the cluster offsets. 49 - Support for storing additional data (such as the VM state) in the 50 snapshots. 51 - If a backing store is used, the cluster size is not constrained 52 (could be backported to QCOW). 53 - L2 tables have always a size of one cluster. 54 */ 55 56 57 typedef struct { 58 uint32_t magic; 59 uint32_t len; 60 } QEMU_PACKED QCowExtension; 61 62 #define QCOW2_EXT_MAGIC_END 0 63 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA 64 #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 65 66 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) 67 { 68 const QCowHeader *cow_header = (const void *)buf; 69 70 if (buf_size >= sizeof(QCowHeader) && 71 be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 72 be32_to_cpu(cow_header->version) >= 2) 73 return 100; 74 else 75 return 0; 76 } 77 78 79 /* 80 * read qcow2 extension and fill bs 81 * start reading from start_offset 82 * finish reading upon magic of value 0 or when end_offset reached 83 * unknown magic is skipped (future extension this version knows nothing about) 84 * return 0 upon success, non-0 otherwise 85 */ 86 static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, 87 uint64_t end_offset, void **p_feature_table, 88 Error **errp) 89 { 90 BDRVQcow2State *s = bs->opaque; 91 QCowExtension ext; 92 uint64_t offset; 93 int ret; 94 95 #ifdef DEBUG_EXT 96 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); 97 #endif 98 offset = start_offset; 99 while (offset < end_offset) { 100 101 #ifdef DEBUG_EXT 102 /* Sanity check */ 103 if (offset > s->cluster_size) 104 printf("qcow2_read_extension: suspicious offset %lu\n", offset); 105 106 printf("attempting to read extended header in offset %lu\n", offset); 107 #endif 108 109 ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext)); 110 if (ret < 0) { 111 error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " 112 "pread fail from offset %" PRIu64, offset); 113 return 1; 114 } 115 be32_to_cpus(&ext.magic); 116 be32_to_cpus(&ext.len); 117 offset += sizeof(ext); 118 #ifdef DEBUG_EXT 119 printf("ext.magic = 0x%x\n", ext.magic); 120 #endif 121 if (offset > end_offset || ext.len > end_offset - offset) { 122 error_setg(errp, "Header extension too large"); 123 return -EINVAL; 124 } 125 126 switch (ext.magic) { 127 case QCOW2_EXT_MAGIC_END: 128 return 0; 129 130 case QCOW2_EXT_MAGIC_BACKING_FORMAT: 131 if (ext.len >= sizeof(bs->backing_format)) { 132 error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32 133 " too large (>=%zu)", ext.len, 134 sizeof(bs->backing_format)); 135 return 2; 136 } 137 ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len); 138 if (ret < 0) { 139 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " 140 "Could not read format name"); 141 return 3; 142 } 143 bs->backing_format[ext.len] = '\0'; 144 s->image_backing_format = g_strdup(bs->backing_format); 145 #ifdef DEBUG_EXT 146 printf("Qcow2: Got format extension %s\n", bs->backing_format); 147 #endif 148 break; 149 150 case QCOW2_EXT_MAGIC_FEATURE_TABLE: 151 if (p_feature_table != NULL) { 152 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); 153 ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len); 154 if (ret < 0) { 155 error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " 156 "Could not read table"); 157 return ret; 158 } 159 160 *p_feature_table = feature_table; 161 } 162 break; 163 164 default: 165 /* unknown magic - save it in case we need to rewrite the header */ 166 { 167 Qcow2UnknownHeaderExtension *uext; 168 169 uext = g_malloc0(sizeof(*uext) + ext.len); 170 uext->magic = ext.magic; 171 uext->len = ext.len; 172 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); 173 174 ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len); 175 if (ret < 0) { 176 error_setg_errno(errp, -ret, "ERROR: unknown extension: " 177 "Could not read data"); 178 return ret; 179 } 180 } 181 break; 182 } 183 184 offset += ((ext.len + 7) & ~7); 185 } 186 187 return 0; 188 } 189 190 static void cleanup_unknown_header_ext(BlockDriverState *bs) 191 { 192 BDRVQcow2State *s = bs->opaque; 193 Qcow2UnknownHeaderExtension *uext, *next; 194 195 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { 196 QLIST_REMOVE(uext, next); 197 g_free(uext); 198 } 199 } 200 201 static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs, 202 Error **errp, const char *fmt, ...) 203 { 204 char msg[64]; 205 va_list ap; 206 207 va_start(ap, fmt); 208 vsnprintf(msg, sizeof(msg), fmt, ap); 209 va_end(ap); 210 211 error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, 212 bdrv_get_device_or_node_name(bs), "qcow2", msg); 213 } 214 215 static void report_unsupported_feature(BlockDriverState *bs, 216 Error **errp, Qcow2Feature *table, uint64_t mask) 217 { 218 char *features = g_strdup(""); 219 char *old; 220 221 while (table && table->name[0] != '\0') { 222 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { 223 if (mask & (1ULL << table->bit)) { 224 old = features; 225 features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "", 226 table->name); 227 g_free(old); 228 mask &= ~(1ULL << table->bit); 229 } 230 } 231 table++; 232 } 233 234 if (mask) { 235 old = features; 236 features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64, 237 old, *old ? ", " : "", mask); 238 g_free(old); 239 } 240 241 report_unsupported(bs, errp, "%s", features); 242 g_free(features); 243 } 244 245 /* 246 * Sets the dirty bit and flushes afterwards if necessary. 247 * 248 * The incompatible_features bit is only set if the image file header was 249 * updated successfully. Therefore it is not required to check the return 250 * value of this function. 251 */ 252 int qcow2_mark_dirty(BlockDriverState *bs) 253 { 254 BDRVQcow2State *s = bs->opaque; 255 uint64_t val; 256 int ret; 257 258 assert(s->qcow_version >= 3); 259 260 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 261 return 0; /* already dirty */ 262 } 263 264 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); 265 ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features), 266 &val, sizeof(val)); 267 if (ret < 0) { 268 return ret; 269 } 270 ret = bdrv_flush(bs->file->bs); 271 if (ret < 0) { 272 return ret; 273 } 274 275 /* Only treat image as dirty if the header was updated successfully */ 276 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY; 277 return 0; 278 } 279 280 /* 281 * Clears the dirty bit and flushes before if necessary. Only call this 282 * function when there are no pending requests, it does not guard against 283 * concurrent requests dirtying the image. 284 */ 285 static int qcow2_mark_clean(BlockDriverState *bs) 286 { 287 BDRVQcow2State *s = bs->opaque; 288 289 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 290 int ret; 291 292 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY; 293 294 ret = bdrv_flush(bs); 295 if (ret < 0) { 296 return ret; 297 } 298 299 return qcow2_update_header(bs); 300 } 301 return 0; 302 } 303 304 /* 305 * Marks the image as corrupt. 306 */ 307 int qcow2_mark_corrupt(BlockDriverState *bs) 308 { 309 BDRVQcow2State *s = bs->opaque; 310 311 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; 312 return qcow2_update_header(bs); 313 } 314 315 /* 316 * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes 317 * before if necessary. 318 */ 319 int qcow2_mark_consistent(BlockDriverState *bs) 320 { 321 BDRVQcow2State *s = bs->opaque; 322 323 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 324 int ret = bdrv_flush(bs); 325 if (ret < 0) { 326 return ret; 327 } 328 329 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT; 330 return qcow2_update_header(bs); 331 } 332 return 0; 333 } 334 335 static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result, 336 BdrvCheckMode fix) 337 { 338 int ret = qcow2_check_refcounts(bs, result, fix); 339 if (ret < 0) { 340 return ret; 341 } 342 343 if (fix && result->check_errors == 0 && result->corruptions == 0) { 344 ret = qcow2_mark_clean(bs); 345 if (ret < 0) { 346 return ret; 347 } 348 return qcow2_mark_consistent(bs); 349 } 350 return ret; 351 } 352 353 static int validate_table_offset(BlockDriverState *bs, uint64_t offset, 354 uint64_t entries, size_t entry_len) 355 { 356 BDRVQcow2State *s = bs->opaque; 357 uint64_t size; 358 359 /* Use signed INT64_MAX as the maximum even for uint64_t header fields, 360 * because values will be passed to qemu functions taking int64_t. */ 361 if (entries > INT64_MAX / entry_len) { 362 return -EINVAL; 363 } 364 365 size = entries * entry_len; 366 367 if (INT64_MAX - size < offset) { 368 return -EINVAL; 369 } 370 371 /* Tables must be cluster aligned */ 372 if (offset & (s->cluster_size - 1)) { 373 return -EINVAL; 374 } 375 376 return 0; 377 } 378 379 static QemuOptsList qcow2_runtime_opts = { 380 .name = "qcow2", 381 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), 382 .desc = { 383 { 384 .name = QCOW2_OPT_LAZY_REFCOUNTS, 385 .type = QEMU_OPT_BOOL, 386 .help = "Postpone refcount updates", 387 }, 388 { 389 .name = QCOW2_OPT_DISCARD_REQUEST, 390 .type = QEMU_OPT_BOOL, 391 .help = "Pass guest discard requests to the layer below", 392 }, 393 { 394 .name = QCOW2_OPT_DISCARD_SNAPSHOT, 395 .type = QEMU_OPT_BOOL, 396 .help = "Generate discard requests when snapshot related space " 397 "is freed", 398 }, 399 { 400 .name = QCOW2_OPT_DISCARD_OTHER, 401 .type = QEMU_OPT_BOOL, 402 .help = "Generate discard requests when other clusters are freed", 403 }, 404 { 405 .name = QCOW2_OPT_OVERLAP, 406 .type = QEMU_OPT_STRING, 407 .help = "Selects which overlap checks to perform from a range of " 408 "templates (none, constant, cached, all)", 409 }, 410 { 411 .name = QCOW2_OPT_OVERLAP_TEMPLATE, 412 .type = QEMU_OPT_STRING, 413 .help = "Selects which overlap checks to perform from a range of " 414 "templates (none, constant, cached, all)", 415 }, 416 { 417 .name = QCOW2_OPT_OVERLAP_MAIN_HEADER, 418 .type = QEMU_OPT_BOOL, 419 .help = "Check for unintended writes into the main qcow2 header", 420 }, 421 { 422 .name = QCOW2_OPT_OVERLAP_ACTIVE_L1, 423 .type = QEMU_OPT_BOOL, 424 .help = "Check for unintended writes into the active L1 table", 425 }, 426 { 427 .name = QCOW2_OPT_OVERLAP_ACTIVE_L2, 428 .type = QEMU_OPT_BOOL, 429 .help = "Check for unintended writes into an active L2 table", 430 }, 431 { 432 .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 433 .type = QEMU_OPT_BOOL, 434 .help = "Check for unintended writes into the refcount table", 435 }, 436 { 437 .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 438 .type = QEMU_OPT_BOOL, 439 .help = "Check for unintended writes into a refcount block", 440 }, 441 { 442 .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 443 .type = QEMU_OPT_BOOL, 444 .help = "Check for unintended writes into the snapshot table", 445 }, 446 { 447 .name = QCOW2_OPT_OVERLAP_INACTIVE_L1, 448 .type = QEMU_OPT_BOOL, 449 .help = "Check for unintended writes into an inactive L1 table", 450 }, 451 { 452 .name = QCOW2_OPT_OVERLAP_INACTIVE_L2, 453 .type = QEMU_OPT_BOOL, 454 .help = "Check for unintended writes into an inactive L2 table", 455 }, 456 { 457 .name = QCOW2_OPT_CACHE_SIZE, 458 .type = QEMU_OPT_SIZE, 459 .help = "Maximum combined metadata (L2 tables and refcount blocks) " 460 "cache size", 461 }, 462 { 463 .name = QCOW2_OPT_L2_CACHE_SIZE, 464 .type = QEMU_OPT_SIZE, 465 .help = "Maximum L2 table cache size", 466 }, 467 { 468 .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE, 469 .type = QEMU_OPT_SIZE, 470 .help = "Maximum refcount block cache size", 471 }, 472 { 473 .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, 474 .type = QEMU_OPT_NUMBER, 475 .help = "Clean unused cache entries after this time (in seconds)", 476 }, 477 { /* end of list */ } 478 }, 479 }; 480 481 static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { 482 [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER, 483 [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1, 484 [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2, 485 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 486 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 487 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 488 [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1, 489 [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, 490 }; 491 492 static void cache_clean_timer_cb(void *opaque) 493 { 494 BlockDriverState *bs = opaque; 495 BDRVQcow2State *s = bs->opaque; 496 qcow2_cache_clean_unused(bs, s->l2_table_cache); 497 qcow2_cache_clean_unused(bs, s->refcount_block_cache); 498 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 499 (int64_t) s->cache_clean_interval * 1000); 500 } 501 502 static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) 503 { 504 BDRVQcow2State *s = bs->opaque; 505 if (s->cache_clean_interval > 0) { 506 s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, 507 SCALE_MS, cache_clean_timer_cb, 508 bs); 509 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 510 (int64_t) s->cache_clean_interval * 1000); 511 } 512 } 513 514 static void cache_clean_timer_del(BlockDriverState *bs) 515 { 516 BDRVQcow2State *s = bs->opaque; 517 if (s->cache_clean_timer) { 518 timer_del(s->cache_clean_timer); 519 timer_free(s->cache_clean_timer); 520 s->cache_clean_timer = NULL; 521 } 522 } 523 524 static void qcow2_detach_aio_context(BlockDriverState *bs) 525 { 526 cache_clean_timer_del(bs); 527 } 528 529 static void qcow2_attach_aio_context(BlockDriverState *bs, 530 AioContext *new_context) 531 { 532 cache_clean_timer_init(bs, new_context); 533 } 534 535 static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, 536 uint64_t *l2_cache_size, 537 uint64_t *refcount_cache_size, Error **errp) 538 { 539 BDRVQcow2State *s = bs->opaque; 540 uint64_t combined_cache_size; 541 bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; 542 543 combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); 544 l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); 545 refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 546 547 combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0); 548 *l2_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 0); 549 *refcount_cache_size = qemu_opt_get_size(opts, 550 QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); 551 552 if (combined_cache_size_set) { 553 if (l2_cache_size_set && refcount_cache_size_set) { 554 error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE 555 " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set " 556 "the same time"); 557 return; 558 } else if (*l2_cache_size > combined_cache_size) { 559 error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed " 560 QCOW2_OPT_CACHE_SIZE); 561 return; 562 } else if (*refcount_cache_size > combined_cache_size) { 563 error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed " 564 QCOW2_OPT_CACHE_SIZE); 565 return; 566 } 567 568 if (l2_cache_size_set) { 569 *refcount_cache_size = combined_cache_size - *l2_cache_size; 570 } else if (refcount_cache_size_set) { 571 *l2_cache_size = combined_cache_size - *refcount_cache_size; 572 } else { 573 *refcount_cache_size = combined_cache_size 574 / (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1); 575 *l2_cache_size = combined_cache_size - *refcount_cache_size; 576 } 577 } else { 578 if (!l2_cache_size_set && !refcount_cache_size_set) { 579 *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE, 580 (uint64_t)DEFAULT_L2_CACHE_CLUSTERS 581 * s->cluster_size); 582 *refcount_cache_size = *l2_cache_size 583 / DEFAULT_L2_REFCOUNT_SIZE_RATIO; 584 } else if (!l2_cache_size_set) { 585 *l2_cache_size = *refcount_cache_size 586 * DEFAULT_L2_REFCOUNT_SIZE_RATIO; 587 } else if (!refcount_cache_size_set) { 588 *refcount_cache_size = *l2_cache_size 589 / DEFAULT_L2_REFCOUNT_SIZE_RATIO; 590 } 591 } 592 } 593 594 typedef struct Qcow2ReopenState { 595 Qcow2Cache *l2_table_cache; 596 Qcow2Cache *refcount_block_cache; 597 bool use_lazy_refcounts; 598 int overlap_check; 599 bool discard_passthrough[QCOW2_DISCARD_MAX]; 600 uint64_t cache_clean_interval; 601 } Qcow2ReopenState; 602 603 static int qcow2_update_options_prepare(BlockDriverState *bs, 604 Qcow2ReopenState *r, 605 QDict *options, int flags, 606 Error **errp) 607 { 608 BDRVQcow2State *s = bs->opaque; 609 QemuOpts *opts = NULL; 610 const char *opt_overlap_check, *opt_overlap_check_template; 611 int overlap_check_template = 0; 612 uint64_t l2_cache_size, refcount_cache_size; 613 int i; 614 Error *local_err = NULL; 615 int ret; 616 617 opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); 618 qemu_opts_absorb_qdict(opts, options, &local_err); 619 if (local_err) { 620 error_propagate(errp, local_err); 621 ret = -EINVAL; 622 goto fail; 623 } 624 625 /* get L2 table/refcount block cache size from command line options */ 626 read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size, 627 &local_err); 628 if (local_err) { 629 error_propagate(errp, local_err); 630 ret = -EINVAL; 631 goto fail; 632 } 633 634 l2_cache_size /= s->cluster_size; 635 if (l2_cache_size < MIN_L2_CACHE_SIZE) { 636 l2_cache_size = MIN_L2_CACHE_SIZE; 637 } 638 if (l2_cache_size > INT_MAX) { 639 error_setg(errp, "L2 cache size too big"); 640 ret = -EINVAL; 641 goto fail; 642 } 643 644 refcount_cache_size /= s->cluster_size; 645 if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { 646 refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; 647 } 648 if (refcount_cache_size > INT_MAX) { 649 error_setg(errp, "Refcount cache size too big"); 650 ret = -EINVAL; 651 goto fail; 652 } 653 654 /* alloc new L2 table/refcount block cache, flush old one */ 655 if (s->l2_table_cache) { 656 ret = qcow2_cache_flush(bs, s->l2_table_cache); 657 if (ret) { 658 error_setg_errno(errp, -ret, "Failed to flush the L2 table cache"); 659 goto fail; 660 } 661 } 662 663 if (s->refcount_block_cache) { 664 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 665 if (ret) { 666 error_setg_errno(errp, -ret, 667 "Failed to flush the refcount block cache"); 668 goto fail; 669 } 670 } 671 672 r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size); 673 r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size); 674 if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { 675 error_setg(errp, "Could not allocate metadata caches"); 676 ret = -ENOMEM; 677 goto fail; 678 } 679 680 /* New interval for cache cleanup timer */ 681 r->cache_clean_interval = 682 qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 683 s->cache_clean_interval); 684 if (r->cache_clean_interval > UINT_MAX) { 685 error_setg(errp, "Cache clean interval too big"); 686 ret = -EINVAL; 687 goto fail; 688 } 689 690 /* lazy-refcounts; flush if going from enabled to disabled */ 691 r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, 692 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); 693 if (r->use_lazy_refcounts && s->qcow_version < 3) { 694 error_setg(errp, "Lazy refcounts require a qcow2 image with at least " 695 "qemu 1.1 compatibility level"); 696 ret = -EINVAL; 697 goto fail; 698 } 699 700 if (s->use_lazy_refcounts && !r->use_lazy_refcounts) { 701 ret = qcow2_mark_clean(bs); 702 if (ret < 0) { 703 error_setg_errno(errp, -ret, "Failed to disable lazy refcounts"); 704 goto fail; 705 } 706 } 707 708 /* Overlap check options */ 709 opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); 710 opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); 711 if (opt_overlap_check_template && opt_overlap_check && 712 strcmp(opt_overlap_check_template, opt_overlap_check)) 713 { 714 error_setg(errp, "Conflicting values for qcow2 options '" 715 QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE 716 "' ('%s')", opt_overlap_check, opt_overlap_check_template); 717 ret = -EINVAL; 718 goto fail; 719 } 720 if (!opt_overlap_check) { 721 opt_overlap_check = opt_overlap_check_template ?: "cached"; 722 } 723 724 if (!strcmp(opt_overlap_check, "none")) { 725 overlap_check_template = 0; 726 } else if (!strcmp(opt_overlap_check, "constant")) { 727 overlap_check_template = QCOW2_OL_CONSTANT; 728 } else if (!strcmp(opt_overlap_check, "cached")) { 729 overlap_check_template = QCOW2_OL_CACHED; 730 } else if (!strcmp(opt_overlap_check, "all")) { 731 overlap_check_template = QCOW2_OL_ALL; 732 } else { 733 error_setg(errp, "Unsupported value '%s' for qcow2 option " 734 "'overlap-check'. Allowed are any of the following: " 735 "none, constant, cached, all", opt_overlap_check); 736 ret = -EINVAL; 737 goto fail; 738 } 739 740 r->overlap_check = 0; 741 for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { 742 /* overlap-check defines a template bitmask, but every flag may be 743 * overwritten through the associated boolean option */ 744 r->overlap_check |= 745 qemu_opt_get_bool(opts, overlap_bool_option_names[i], 746 overlap_check_template & (1 << i)) << i; 747 } 748 749 r->discard_passthrough[QCOW2_DISCARD_NEVER] = false; 750 r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; 751 r->discard_passthrough[QCOW2_DISCARD_REQUEST] = 752 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, 753 flags & BDRV_O_UNMAP); 754 r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = 755 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); 756 r->discard_passthrough[QCOW2_DISCARD_OTHER] = 757 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); 758 759 ret = 0; 760 fail: 761 qemu_opts_del(opts); 762 opts = NULL; 763 return ret; 764 } 765 766 static void qcow2_update_options_commit(BlockDriverState *bs, 767 Qcow2ReopenState *r) 768 { 769 BDRVQcow2State *s = bs->opaque; 770 int i; 771 772 if (s->l2_table_cache) { 773 qcow2_cache_destroy(bs, s->l2_table_cache); 774 } 775 if (s->refcount_block_cache) { 776 qcow2_cache_destroy(bs, s->refcount_block_cache); 777 } 778 s->l2_table_cache = r->l2_table_cache; 779 s->refcount_block_cache = r->refcount_block_cache; 780 781 s->overlap_check = r->overlap_check; 782 s->use_lazy_refcounts = r->use_lazy_refcounts; 783 784 for (i = 0; i < QCOW2_DISCARD_MAX; i++) { 785 s->discard_passthrough[i] = r->discard_passthrough[i]; 786 } 787 788 if (s->cache_clean_interval != r->cache_clean_interval) { 789 cache_clean_timer_del(bs); 790 s->cache_clean_interval = r->cache_clean_interval; 791 cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); 792 } 793 } 794 795 static void qcow2_update_options_abort(BlockDriverState *bs, 796 Qcow2ReopenState *r) 797 { 798 if (r->l2_table_cache) { 799 qcow2_cache_destroy(bs, r->l2_table_cache); 800 } 801 if (r->refcount_block_cache) { 802 qcow2_cache_destroy(bs, r->refcount_block_cache); 803 } 804 } 805 806 static int qcow2_update_options(BlockDriverState *bs, QDict *options, 807 int flags, Error **errp) 808 { 809 Qcow2ReopenState r = {}; 810 int ret; 811 812 ret = qcow2_update_options_prepare(bs, &r, options, flags, errp); 813 if (ret >= 0) { 814 qcow2_update_options_commit(bs, &r); 815 } else { 816 qcow2_update_options_abort(bs, &r); 817 } 818 819 return ret; 820 } 821 822 static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, 823 Error **errp) 824 { 825 BDRVQcow2State *s = bs->opaque; 826 unsigned int len, i; 827 int ret = 0; 828 QCowHeader header; 829 Error *local_err = NULL; 830 uint64_t ext_end; 831 uint64_t l1_vm_state_index; 832 833 ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header)); 834 if (ret < 0) { 835 error_setg_errno(errp, -ret, "Could not read qcow2 header"); 836 goto fail; 837 } 838 be32_to_cpus(&header.magic); 839 be32_to_cpus(&header.version); 840 be64_to_cpus(&header.backing_file_offset); 841 be32_to_cpus(&header.backing_file_size); 842 be64_to_cpus(&header.size); 843 be32_to_cpus(&header.cluster_bits); 844 be32_to_cpus(&header.crypt_method); 845 be64_to_cpus(&header.l1_table_offset); 846 be32_to_cpus(&header.l1_size); 847 be64_to_cpus(&header.refcount_table_offset); 848 be32_to_cpus(&header.refcount_table_clusters); 849 be64_to_cpus(&header.snapshots_offset); 850 be32_to_cpus(&header.nb_snapshots); 851 852 if (header.magic != QCOW_MAGIC) { 853 error_setg(errp, "Image is not in qcow2 format"); 854 ret = -EINVAL; 855 goto fail; 856 } 857 if (header.version < 2 || header.version > 3) { 858 report_unsupported(bs, errp, "QCOW version %" PRIu32, header.version); 859 ret = -ENOTSUP; 860 goto fail; 861 } 862 863 s->qcow_version = header.version; 864 865 /* Initialise cluster size */ 866 if (header.cluster_bits < MIN_CLUSTER_BITS || 867 header.cluster_bits > MAX_CLUSTER_BITS) { 868 error_setg(errp, "Unsupported cluster size: 2^%" PRIu32, 869 header.cluster_bits); 870 ret = -EINVAL; 871 goto fail; 872 } 873 874 s->cluster_bits = header.cluster_bits; 875 s->cluster_size = 1 << s->cluster_bits; 876 s->cluster_sectors = 1 << (s->cluster_bits - 9); 877 878 /* Initialise version 3 header fields */ 879 if (header.version == 2) { 880 header.incompatible_features = 0; 881 header.compatible_features = 0; 882 header.autoclear_features = 0; 883 header.refcount_order = 4; 884 header.header_length = 72; 885 } else { 886 be64_to_cpus(&header.incompatible_features); 887 be64_to_cpus(&header.compatible_features); 888 be64_to_cpus(&header.autoclear_features); 889 be32_to_cpus(&header.refcount_order); 890 be32_to_cpus(&header.header_length); 891 892 if (header.header_length < 104) { 893 error_setg(errp, "qcow2 header too short"); 894 ret = -EINVAL; 895 goto fail; 896 } 897 } 898 899 if (header.header_length > s->cluster_size) { 900 error_setg(errp, "qcow2 header exceeds cluster size"); 901 ret = -EINVAL; 902 goto fail; 903 } 904 905 if (header.header_length > sizeof(header)) { 906 s->unknown_header_fields_size = header.header_length - sizeof(header); 907 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); 908 ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields, 909 s->unknown_header_fields_size); 910 if (ret < 0) { 911 error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " 912 "fields"); 913 goto fail; 914 } 915 } 916 917 if (header.backing_file_offset > s->cluster_size) { 918 error_setg(errp, "Invalid backing file offset"); 919 ret = -EINVAL; 920 goto fail; 921 } 922 923 if (header.backing_file_offset) { 924 ext_end = header.backing_file_offset; 925 } else { 926 ext_end = 1 << header.cluster_bits; 927 } 928 929 /* Handle feature bits */ 930 s->incompatible_features = header.incompatible_features; 931 s->compatible_features = header.compatible_features; 932 s->autoclear_features = header.autoclear_features; 933 934 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { 935 void *feature_table = NULL; 936 qcow2_read_extensions(bs, header.header_length, ext_end, 937 &feature_table, NULL); 938 report_unsupported_feature(bs, errp, feature_table, 939 s->incompatible_features & 940 ~QCOW2_INCOMPAT_MASK); 941 ret = -ENOTSUP; 942 g_free(feature_table); 943 goto fail; 944 } 945 946 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 947 /* Corrupt images may not be written to unless they are being repaired 948 */ 949 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { 950 error_setg(errp, "qcow2: Image is corrupt; cannot be opened " 951 "read/write"); 952 ret = -EACCES; 953 goto fail; 954 } 955 } 956 957 /* Check support for various header values */ 958 if (header.refcount_order > 6) { 959 error_setg(errp, "Reference count entry width too large; may not " 960 "exceed 64 bits"); 961 ret = -EINVAL; 962 goto fail; 963 } 964 s->refcount_order = header.refcount_order; 965 s->refcount_bits = 1 << s->refcount_order; 966 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); 967 s->refcount_max += s->refcount_max - 1; 968 969 if (header.crypt_method > QCOW_CRYPT_AES) { 970 error_setg(errp, "Unsupported encryption method: %" PRIu32, 971 header.crypt_method); 972 ret = -EINVAL; 973 goto fail; 974 } 975 if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) { 976 error_setg(errp, "AES cipher not available"); 977 ret = -EINVAL; 978 goto fail; 979 } 980 s->crypt_method_header = header.crypt_method; 981 if (s->crypt_method_header) { 982 bs->encrypted = 1; 983 } 984 985 s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ 986 s->l2_size = 1 << s->l2_bits; 987 /* 2^(s->refcount_order - 3) is the refcount width in bytes */ 988 s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3); 989 s->refcount_block_size = 1 << s->refcount_block_bits; 990 bs->total_sectors = header.size / 512; 991 s->csize_shift = (62 - (s->cluster_bits - 8)); 992 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; 993 s->cluster_offset_mask = (1LL << s->csize_shift) - 1; 994 995 s->refcount_table_offset = header.refcount_table_offset; 996 s->refcount_table_size = 997 header.refcount_table_clusters << (s->cluster_bits - 3); 998 999 if (header.refcount_table_clusters > qcow2_max_refcount_clusters(s)) { 1000 error_setg(errp, "Reference count table too large"); 1001 ret = -EINVAL; 1002 goto fail; 1003 } 1004 1005 ret = validate_table_offset(bs, s->refcount_table_offset, 1006 s->refcount_table_size, sizeof(uint64_t)); 1007 if (ret < 0) { 1008 error_setg(errp, "Invalid reference count table offset"); 1009 goto fail; 1010 } 1011 1012 /* Snapshot table offset/length */ 1013 if (header.nb_snapshots > QCOW_MAX_SNAPSHOTS) { 1014 error_setg(errp, "Too many snapshots"); 1015 ret = -EINVAL; 1016 goto fail; 1017 } 1018 1019 ret = validate_table_offset(bs, header.snapshots_offset, 1020 header.nb_snapshots, 1021 sizeof(QCowSnapshotHeader)); 1022 if (ret < 0) { 1023 error_setg(errp, "Invalid snapshot table offset"); 1024 goto fail; 1025 } 1026 1027 /* read the level 1 table */ 1028 if (header.l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) { 1029 error_setg(errp, "Active L1 table too large"); 1030 ret = -EFBIG; 1031 goto fail; 1032 } 1033 s->l1_size = header.l1_size; 1034 1035 l1_vm_state_index = size_to_l1(s, header.size); 1036 if (l1_vm_state_index > INT_MAX) { 1037 error_setg(errp, "Image is too big"); 1038 ret = -EFBIG; 1039 goto fail; 1040 } 1041 s->l1_vm_state_index = l1_vm_state_index; 1042 1043 /* the L1 table must contain at least enough entries to put 1044 header.size bytes */ 1045 if (s->l1_size < s->l1_vm_state_index) { 1046 error_setg(errp, "L1 table is too small"); 1047 ret = -EINVAL; 1048 goto fail; 1049 } 1050 1051 ret = validate_table_offset(bs, header.l1_table_offset, 1052 header.l1_size, sizeof(uint64_t)); 1053 if (ret < 0) { 1054 error_setg(errp, "Invalid L1 table offset"); 1055 goto fail; 1056 } 1057 s->l1_table_offset = header.l1_table_offset; 1058 1059 1060 if (s->l1_size > 0) { 1061 s->l1_table = qemu_try_blockalign(bs->file->bs, 1062 align_offset(s->l1_size * sizeof(uint64_t), 512)); 1063 if (s->l1_table == NULL) { 1064 error_setg(errp, "Could not allocate L1 table"); 1065 ret = -ENOMEM; 1066 goto fail; 1067 } 1068 ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table, 1069 s->l1_size * sizeof(uint64_t)); 1070 if (ret < 0) { 1071 error_setg_errno(errp, -ret, "Could not read L1 table"); 1072 goto fail; 1073 } 1074 for(i = 0;i < s->l1_size; i++) { 1075 be64_to_cpus(&s->l1_table[i]); 1076 } 1077 } 1078 1079 /* Parse driver-specific options */ 1080 ret = qcow2_update_options(bs, options, flags, errp); 1081 if (ret < 0) { 1082 goto fail; 1083 } 1084 1085 s->cluster_cache = g_malloc(s->cluster_size); 1086 /* one more sector for decompressed data alignment */ 1087 s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS 1088 * s->cluster_size + 512); 1089 if (s->cluster_data == NULL) { 1090 error_setg(errp, "Could not allocate temporary cluster buffer"); 1091 ret = -ENOMEM; 1092 goto fail; 1093 } 1094 1095 s->cluster_cache_offset = -1; 1096 s->flags = flags; 1097 1098 ret = qcow2_refcount_init(bs); 1099 if (ret != 0) { 1100 error_setg_errno(errp, -ret, "Could not initialize refcount handling"); 1101 goto fail; 1102 } 1103 1104 QLIST_INIT(&s->cluster_allocs); 1105 QTAILQ_INIT(&s->discards); 1106 1107 /* read qcow2 extensions */ 1108 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL, 1109 &local_err)) { 1110 error_propagate(errp, local_err); 1111 ret = -EINVAL; 1112 goto fail; 1113 } 1114 1115 /* read the backing file name */ 1116 if (header.backing_file_offset != 0) { 1117 len = header.backing_file_size; 1118 if (len > MIN(1023, s->cluster_size - header.backing_file_offset) || 1119 len >= sizeof(bs->backing_file)) { 1120 error_setg(errp, "Backing file name too long"); 1121 ret = -EINVAL; 1122 goto fail; 1123 } 1124 ret = bdrv_pread(bs->file->bs, header.backing_file_offset, 1125 bs->backing_file, len); 1126 if (ret < 0) { 1127 error_setg_errno(errp, -ret, "Could not read backing file name"); 1128 goto fail; 1129 } 1130 bs->backing_file[len] = '\0'; 1131 s->image_backing_file = g_strdup(bs->backing_file); 1132 } 1133 1134 /* Internal snapshots */ 1135 s->snapshots_offset = header.snapshots_offset; 1136 s->nb_snapshots = header.nb_snapshots; 1137 1138 ret = qcow2_read_snapshots(bs); 1139 if (ret < 0) { 1140 error_setg_errno(errp, -ret, "Could not read snapshots"); 1141 goto fail; 1142 } 1143 1144 /* Clear unknown autoclear feature bits */ 1145 if (!bs->read_only && !(flags & BDRV_O_INACTIVE) && s->autoclear_features) { 1146 s->autoclear_features = 0; 1147 ret = qcow2_update_header(bs); 1148 if (ret < 0) { 1149 error_setg_errno(errp, -ret, "Could not update qcow2 header"); 1150 goto fail; 1151 } 1152 } 1153 1154 /* Initialise locks */ 1155 qemu_co_mutex_init(&s->lock); 1156 1157 /* Repair image if dirty */ 1158 if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && 1159 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { 1160 BdrvCheckResult result = {0}; 1161 1162 ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); 1163 if (ret < 0) { 1164 error_setg_errno(errp, -ret, "Could not repair dirty image"); 1165 goto fail; 1166 } 1167 } 1168 1169 #ifdef DEBUG_ALLOC 1170 { 1171 BdrvCheckResult result = {0}; 1172 qcow2_check_refcounts(bs, &result, 0); 1173 } 1174 #endif 1175 return ret; 1176 1177 fail: 1178 g_free(s->unknown_header_fields); 1179 cleanup_unknown_header_ext(bs); 1180 qcow2_free_snapshots(bs); 1181 qcow2_refcount_close(bs); 1182 qemu_vfree(s->l1_table); 1183 /* else pre-write overlap checks in cache_destroy may crash */ 1184 s->l1_table = NULL; 1185 cache_clean_timer_del(bs); 1186 if (s->l2_table_cache) { 1187 qcow2_cache_destroy(bs, s->l2_table_cache); 1188 } 1189 if (s->refcount_block_cache) { 1190 qcow2_cache_destroy(bs, s->refcount_block_cache); 1191 } 1192 g_free(s->cluster_cache); 1193 qemu_vfree(s->cluster_data); 1194 return ret; 1195 } 1196 1197 static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) 1198 { 1199 BDRVQcow2State *s = bs->opaque; 1200 1201 bs->bl.write_zeroes_alignment = s->cluster_sectors; 1202 } 1203 1204 static int qcow2_set_key(BlockDriverState *bs, const char *key) 1205 { 1206 BDRVQcow2State *s = bs->opaque; 1207 uint8_t keybuf[16]; 1208 int len, i; 1209 Error *err = NULL; 1210 1211 memset(keybuf, 0, 16); 1212 len = strlen(key); 1213 if (len > 16) 1214 len = 16; 1215 /* XXX: we could compress the chars to 7 bits to increase 1216 entropy */ 1217 for(i = 0;i < len;i++) { 1218 keybuf[i] = key[i]; 1219 } 1220 assert(bs->encrypted); 1221 1222 qcrypto_cipher_free(s->cipher); 1223 s->cipher = qcrypto_cipher_new( 1224 QCRYPTO_CIPHER_ALG_AES_128, 1225 QCRYPTO_CIPHER_MODE_CBC, 1226 keybuf, G_N_ELEMENTS(keybuf), 1227 &err); 1228 1229 if (!s->cipher) { 1230 /* XXX would be nice if errors in this method could 1231 * be properly propagate to the caller. Would need 1232 * the bdrv_set_key() API signature to be fixed. */ 1233 error_free(err); 1234 return -1; 1235 } 1236 return 0; 1237 } 1238 1239 static int qcow2_reopen_prepare(BDRVReopenState *state, 1240 BlockReopenQueue *queue, Error **errp) 1241 { 1242 Qcow2ReopenState *r; 1243 int ret; 1244 1245 r = g_new0(Qcow2ReopenState, 1); 1246 state->opaque = r; 1247 1248 ret = qcow2_update_options_prepare(state->bs, r, state->options, 1249 state->flags, errp); 1250 if (ret < 0) { 1251 goto fail; 1252 } 1253 1254 /* We need to write out any unwritten data if we reopen read-only. */ 1255 if ((state->flags & BDRV_O_RDWR) == 0) { 1256 ret = bdrv_flush(state->bs); 1257 if (ret < 0) { 1258 goto fail; 1259 } 1260 1261 ret = qcow2_mark_clean(state->bs); 1262 if (ret < 0) { 1263 goto fail; 1264 } 1265 } 1266 1267 return 0; 1268 1269 fail: 1270 qcow2_update_options_abort(state->bs, r); 1271 g_free(r); 1272 return ret; 1273 } 1274 1275 static void qcow2_reopen_commit(BDRVReopenState *state) 1276 { 1277 qcow2_update_options_commit(state->bs, state->opaque); 1278 g_free(state->opaque); 1279 } 1280 1281 static void qcow2_reopen_abort(BDRVReopenState *state) 1282 { 1283 qcow2_update_options_abort(state->bs, state->opaque); 1284 g_free(state->opaque); 1285 } 1286 1287 static void qcow2_join_options(QDict *options, QDict *old_options) 1288 { 1289 bool has_new_overlap_template = 1290 qdict_haskey(options, QCOW2_OPT_OVERLAP) || 1291 qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE); 1292 bool has_new_total_cache_size = 1293 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE); 1294 bool has_all_cache_options; 1295 1296 /* New overlap template overrides all old overlap options */ 1297 if (has_new_overlap_template) { 1298 qdict_del(old_options, QCOW2_OPT_OVERLAP); 1299 qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE); 1300 qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER); 1301 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1); 1302 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2); 1303 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE); 1304 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK); 1305 qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE); 1306 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1); 1307 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2); 1308 } 1309 1310 /* New total cache size overrides all old options */ 1311 if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) { 1312 qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE); 1313 qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 1314 } 1315 1316 qdict_join(options, old_options, false); 1317 1318 /* 1319 * If after merging all cache size options are set, an old total size is 1320 * overwritten. Do keep all options, however, if all three are new. The 1321 * resulting error message is what we want to happen. 1322 */ 1323 has_all_cache_options = 1324 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) || 1325 qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) || 1326 qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 1327 1328 if (has_all_cache_options && !has_new_total_cache_size) { 1329 qdict_del(options, QCOW2_OPT_CACHE_SIZE); 1330 } 1331 } 1332 1333 static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, 1334 int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) 1335 { 1336 BDRVQcow2State *s = bs->opaque; 1337 uint64_t cluster_offset; 1338 int index_in_cluster, ret; 1339 int64_t status = 0; 1340 1341 *pnum = nb_sectors; 1342 qemu_co_mutex_lock(&s->lock); 1343 ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset); 1344 qemu_co_mutex_unlock(&s->lock); 1345 if (ret < 0) { 1346 return ret; 1347 } 1348 1349 if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && 1350 !s->cipher) { 1351 index_in_cluster = sector_num & (s->cluster_sectors - 1); 1352 cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); 1353 *file = bs->file->bs; 1354 status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset; 1355 } 1356 if (ret == QCOW2_CLUSTER_ZERO) { 1357 status |= BDRV_BLOCK_ZERO; 1358 } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { 1359 status |= BDRV_BLOCK_DATA; 1360 } 1361 return status; 1362 } 1363 1364 /* handle reading after the end of the backing file */ 1365 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, 1366 int64_t sector_num, int nb_sectors) 1367 { 1368 int n1; 1369 if ((sector_num + nb_sectors) <= bs->total_sectors) 1370 return nb_sectors; 1371 if (sector_num >= bs->total_sectors) 1372 n1 = 0; 1373 else 1374 n1 = bs->total_sectors - sector_num; 1375 1376 qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1)); 1377 1378 return n1; 1379 } 1380 1381 static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, 1382 int remaining_sectors, QEMUIOVector *qiov) 1383 { 1384 BDRVQcow2State *s = bs->opaque; 1385 int index_in_cluster, n1; 1386 int ret; 1387 int cur_nr_sectors; /* number of sectors in current iteration */ 1388 uint64_t cluster_offset = 0; 1389 uint64_t bytes_done = 0; 1390 QEMUIOVector hd_qiov; 1391 uint8_t *cluster_data = NULL; 1392 1393 qemu_iovec_init(&hd_qiov, qiov->niov); 1394 1395 qemu_co_mutex_lock(&s->lock); 1396 1397 while (remaining_sectors != 0) { 1398 1399 /* prepare next request */ 1400 cur_nr_sectors = remaining_sectors; 1401 if (s->cipher) { 1402 cur_nr_sectors = MIN(cur_nr_sectors, 1403 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); 1404 } 1405 1406 ret = qcow2_get_cluster_offset(bs, sector_num << 9, 1407 &cur_nr_sectors, &cluster_offset); 1408 if (ret < 0) { 1409 goto fail; 1410 } 1411 1412 index_in_cluster = sector_num & (s->cluster_sectors - 1); 1413 1414 qemu_iovec_reset(&hd_qiov); 1415 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, 1416 cur_nr_sectors * 512); 1417 1418 switch (ret) { 1419 case QCOW2_CLUSTER_UNALLOCATED: 1420 1421 if (bs->backing) { 1422 /* read from the base image */ 1423 n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov, 1424 sector_num, cur_nr_sectors); 1425 if (n1 > 0) { 1426 QEMUIOVector local_qiov; 1427 1428 qemu_iovec_init(&local_qiov, hd_qiov.niov); 1429 qemu_iovec_concat(&local_qiov, &hd_qiov, 0, 1430 n1 * BDRV_SECTOR_SIZE); 1431 1432 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 1433 qemu_co_mutex_unlock(&s->lock); 1434 ret = bdrv_co_readv(bs->backing->bs, sector_num, 1435 n1, &local_qiov); 1436 qemu_co_mutex_lock(&s->lock); 1437 1438 qemu_iovec_destroy(&local_qiov); 1439 1440 if (ret < 0) { 1441 goto fail; 1442 } 1443 } 1444 } else { 1445 /* Note: in this case, no need to wait */ 1446 qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); 1447 } 1448 break; 1449 1450 case QCOW2_CLUSTER_ZERO: 1451 qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); 1452 break; 1453 1454 case QCOW2_CLUSTER_COMPRESSED: 1455 /* add AIO support for compressed blocks ? */ 1456 ret = qcow2_decompress_cluster(bs, cluster_offset); 1457 if (ret < 0) { 1458 goto fail; 1459 } 1460 1461 qemu_iovec_from_buf(&hd_qiov, 0, 1462 s->cluster_cache + index_in_cluster * 512, 1463 512 * cur_nr_sectors); 1464 break; 1465 1466 case QCOW2_CLUSTER_NORMAL: 1467 if ((cluster_offset & 511) != 0) { 1468 ret = -EIO; 1469 goto fail; 1470 } 1471 1472 if (bs->encrypted) { 1473 assert(s->cipher); 1474 1475 /* 1476 * For encrypted images, read everything into a temporary 1477 * contiguous buffer on which the AES functions can work. 1478 */ 1479 if (!cluster_data) { 1480 cluster_data = 1481 qemu_try_blockalign(bs->file->bs, 1482 QCOW_MAX_CRYPT_CLUSTERS 1483 * s->cluster_size); 1484 if (cluster_data == NULL) { 1485 ret = -ENOMEM; 1486 goto fail; 1487 } 1488 } 1489 1490 assert(cur_nr_sectors <= 1491 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); 1492 qemu_iovec_reset(&hd_qiov); 1493 qemu_iovec_add(&hd_qiov, cluster_data, 1494 512 * cur_nr_sectors); 1495 } 1496 1497 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 1498 qemu_co_mutex_unlock(&s->lock); 1499 ret = bdrv_co_readv(bs->file->bs, 1500 (cluster_offset >> 9) + index_in_cluster, 1501 cur_nr_sectors, &hd_qiov); 1502 qemu_co_mutex_lock(&s->lock); 1503 if (ret < 0) { 1504 goto fail; 1505 } 1506 if (bs->encrypted) { 1507 assert(s->cipher); 1508 Error *err = NULL; 1509 if (qcow2_encrypt_sectors(s, sector_num, cluster_data, 1510 cluster_data, cur_nr_sectors, false, 1511 &err) < 0) { 1512 error_free(err); 1513 ret = -EIO; 1514 goto fail; 1515 } 1516 qemu_iovec_from_buf(qiov, bytes_done, 1517 cluster_data, 512 * cur_nr_sectors); 1518 } 1519 break; 1520 1521 default: 1522 g_assert_not_reached(); 1523 ret = -EIO; 1524 goto fail; 1525 } 1526 1527 remaining_sectors -= cur_nr_sectors; 1528 sector_num += cur_nr_sectors; 1529 bytes_done += cur_nr_sectors * 512; 1530 } 1531 ret = 0; 1532 1533 fail: 1534 qemu_co_mutex_unlock(&s->lock); 1535 1536 qemu_iovec_destroy(&hd_qiov); 1537 qemu_vfree(cluster_data); 1538 1539 return ret; 1540 } 1541 1542 static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, 1543 int64_t sector_num, 1544 int remaining_sectors, 1545 QEMUIOVector *qiov) 1546 { 1547 BDRVQcow2State *s = bs->opaque; 1548 int index_in_cluster; 1549 int ret; 1550 int cur_nr_sectors; /* number of sectors in current iteration */ 1551 uint64_t cluster_offset; 1552 QEMUIOVector hd_qiov; 1553 uint64_t bytes_done = 0; 1554 uint8_t *cluster_data = NULL; 1555 QCowL2Meta *l2meta = NULL; 1556 1557 trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, 1558 remaining_sectors); 1559 1560 qemu_iovec_init(&hd_qiov, qiov->niov); 1561 1562 s->cluster_cache_offset = -1; /* disable compressed cache */ 1563 1564 qemu_co_mutex_lock(&s->lock); 1565 1566 while (remaining_sectors != 0) { 1567 1568 l2meta = NULL; 1569 1570 trace_qcow2_writev_start_part(qemu_coroutine_self()); 1571 index_in_cluster = sector_num & (s->cluster_sectors - 1); 1572 cur_nr_sectors = remaining_sectors; 1573 if (bs->encrypted && 1574 cur_nr_sectors > 1575 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) { 1576 cur_nr_sectors = 1577 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster; 1578 } 1579 1580 ret = qcow2_alloc_cluster_offset(bs, sector_num << 9, 1581 &cur_nr_sectors, &cluster_offset, &l2meta); 1582 if (ret < 0) { 1583 goto fail; 1584 } 1585 1586 assert((cluster_offset & 511) == 0); 1587 1588 qemu_iovec_reset(&hd_qiov); 1589 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, 1590 cur_nr_sectors * 512); 1591 1592 if (bs->encrypted) { 1593 Error *err = NULL; 1594 assert(s->cipher); 1595 if (!cluster_data) { 1596 cluster_data = qemu_try_blockalign(bs->file->bs, 1597 QCOW_MAX_CRYPT_CLUSTERS 1598 * s->cluster_size); 1599 if (cluster_data == NULL) { 1600 ret = -ENOMEM; 1601 goto fail; 1602 } 1603 } 1604 1605 assert(hd_qiov.size <= 1606 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 1607 qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); 1608 1609 if (qcow2_encrypt_sectors(s, sector_num, cluster_data, 1610 cluster_data, cur_nr_sectors, 1611 true, &err) < 0) { 1612 error_free(err); 1613 ret = -EIO; 1614 goto fail; 1615 } 1616 1617 qemu_iovec_reset(&hd_qiov); 1618 qemu_iovec_add(&hd_qiov, cluster_data, 1619 cur_nr_sectors * 512); 1620 } 1621 1622 ret = qcow2_pre_write_overlap_check(bs, 0, 1623 cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE, 1624 cur_nr_sectors * BDRV_SECTOR_SIZE); 1625 if (ret < 0) { 1626 goto fail; 1627 } 1628 1629 qemu_co_mutex_unlock(&s->lock); 1630 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 1631 trace_qcow2_writev_data(qemu_coroutine_self(), 1632 (cluster_offset >> 9) + index_in_cluster); 1633 ret = bdrv_co_writev(bs->file->bs, 1634 (cluster_offset >> 9) + index_in_cluster, 1635 cur_nr_sectors, &hd_qiov); 1636 qemu_co_mutex_lock(&s->lock); 1637 if (ret < 0) { 1638 goto fail; 1639 } 1640 1641 while (l2meta != NULL) { 1642 QCowL2Meta *next; 1643 1644 ret = qcow2_alloc_cluster_link_l2(bs, l2meta); 1645 if (ret < 0) { 1646 goto fail; 1647 } 1648 1649 /* Take the request off the list of running requests */ 1650 if (l2meta->nb_clusters != 0) { 1651 QLIST_REMOVE(l2meta, next_in_flight); 1652 } 1653 1654 qemu_co_queue_restart_all(&l2meta->dependent_requests); 1655 1656 next = l2meta->next; 1657 g_free(l2meta); 1658 l2meta = next; 1659 } 1660 1661 remaining_sectors -= cur_nr_sectors; 1662 sector_num += cur_nr_sectors; 1663 bytes_done += cur_nr_sectors * 512; 1664 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors); 1665 } 1666 ret = 0; 1667 1668 fail: 1669 qemu_co_mutex_unlock(&s->lock); 1670 1671 while (l2meta != NULL) { 1672 QCowL2Meta *next; 1673 1674 if (l2meta->nb_clusters != 0) { 1675 QLIST_REMOVE(l2meta, next_in_flight); 1676 } 1677 qemu_co_queue_restart_all(&l2meta->dependent_requests); 1678 1679 next = l2meta->next; 1680 g_free(l2meta); 1681 l2meta = next; 1682 } 1683 1684 qemu_iovec_destroy(&hd_qiov); 1685 qemu_vfree(cluster_data); 1686 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 1687 1688 return ret; 1689 } 1690 1691 static int qcow2_inactivate(BlockDriverState *bs) 1692 { 1693 BDRVQcow2State *s = bs->opaque; 1694 int ret, result = 0; 1695 1696 ret = qcow2_cache_flush(bs, s->l2_table_cache); 1697 if (ret) { 1698 result = ret; 1699 error_report("Failed to flush the L2 table cache: %s", 1700 strerror(-ret)); 1701 } 1702 1703 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 1704 if (ret) { 1705 result = ret; 1706 error_report("Failed to flush the refcount block cache: %s", 1707 strerror(-ret)); 1708 } 1709 1710 if (result == 0) { 1711 qcow2_mark_clean(bs); 1712 } 1713 1714 return result; 1715 } 1716 1717 static void qcow2_close(BlockDriverState *bs) 1718 { 1719 BDRVQcow2State *s = bs->opaque; 1720 qemu_vfree(s->l1_table); 1721 /* else pre-write overlap checks in cache_destroy may crash */ 1722 s->l1_table = NULL; 1723 1724 if (!(s->flags & BDRV_O_INACTIVE)) { 1725 qcow2_inactivate(bs); 1726 } 1727 1728 cache_clean_timer_del(bs); 1729 qcow2_cache_destroy(bs, s->l2_table_cache); 1730 qcow2_cache_destroy(bs, s->refcount_block_cache); 1731 1732 qcrypto_cipher_free(s->cipher); 1733 s->cipher = NULL; 1734 1735 g_free(s->unknown_header_fields); 1736 cleanup_unknown_header_ext(bs); 1737 1738 g_free(s->image_backing_file); 1739 g_free(s->image_backing_format); 1740 1741 g_free(s->cluster_cache); 1742 qemu_vfree(s->cluster_data); 1743 qcow2_refcount_close(bs); 1744 qcow2_free_snapshots(bs); 1745 } 1746 1747 static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) 1748 { 1749 BDRVQcow2State *s = bs->opaque; 1750 int flags = s->flags; 1751 QCryptoCipher *cipher = NULL; 1752 QDict *options; 1753 Error *local_err = NULL; 1754 int ret; 1755 1756 /* 1757 * Backing files are read-only which makes all of their metadata immutable, 1758 * that means we don't have to worry about reopening them here. 1759 */ 1760 1761 cipher = s->cipher; 1762 s->cipher = NULL; 1763 1764 qcow2_close(bs); 1765 1766 bdrv_invalidate_cache(bs->file->bs, &local_err); 1767 if (local_err) { 1768 error_propagate(errp, local_err); 1769 bs->drv = NULL; 1770 return; 1771 } 1772 1773 memset(s, 0, sizeof(BDRVQcow2State)); 1774 options = qdict_clone_shallow(bs->options); 1775 1776 flags &= ~BDRV_O_INACTIVE; 1777 ret = qcow2_open(bs, options, flags, &local_err); 1778 QDECREF(options); 1779 if (local_err) { 1780 error_propagate(errp, local_err); 1781 error_prepend(errp, "Could not reopen qcow2 layer: "); 1782 bs->drv = NULL; 1783 return; 1784 } else if (ret < 0) { 1785 error_setg_errno(errp, -ret, "Could not reopen qcow2 layer"); 1786 bs->drv = NULL; 1787 return; 1788 } 1789 1790 s->cipher = cipher; 1791 } 1792 1793 static size_t header_ext_add(char *buf, uint32_t magic, const void *s, 1794 size_t len, size_t buflen) 1795 { 1796 QCowExtension *ext_backing_fmt = (QCowExtension*) buf; 1797 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); 1798 1799 if (buflen < ext_len) { 1800 return -ENOSPC; 1801 } 1802 1803 *ext_backing_fmt = (QCowExtension) { 1804 .magic = cpu_to_be32(magic), 1805 .len = cpu_to_be32(len), 1806 }; 1807 memcpy(buf + sizeof(QCowExtension), s, len); 1808 1809 return ext_len; 1810 } 1811 1812 /* 1813 * Updates the qcow2 header, including the variable length parts of it, i.e. 1814 * the backing file name and all extensions. qcow2 was not designed to allow 1815 * such changes, so if we run out of space (we can only use the first cluster) 1816 * this function may fail. 1817 * 1818 * Returns 0 on success, -errno in error cases. 1819 */ 1820 int qcow2_update_header(BlockDriverState *bs) 1821 { 1822 BDRVQcow2State *s = bs->opaque; 1823 QCowHeader *header; 1824 char *buf; 1825 size_t buflen = s->cluster_size; 1826 int ret; 1827 uint64_t total_size; 1828 uint32_t refcount_table_clusters; 1829 size_t header_length; 1830 Qcow2UnknownHeaderExtension *uext; 1831 1832 buf = qemu_blockalign(bs, buflen); 1833 1834 /* Header structure */ 1835 header = (QCowHeader*) buf; 1836 1837 if (buflen < sizeof(*header)) { 1838 ret = -ENOSPC; 1839 goto fail; 1840 } 1841 1842 header_length = sizeof(*header) + s->unknown_header_fields_size; 1843 total_size = bs->total_sectors * BDRV_SECTOR_SIZE; 1844 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); 1845 1846 *header = (QCowHeader) { 1847 /* Version 2 fields */ 1848 .magic = cpu_to_be32(QCOW_MAGIC), 1849 .version = cpu_to_be32(s->qcow_version), 1850 .backing_file_offset = 0, 1851 .backing_file_size = 0, 1852 .cluster_bits = cpu_to_be32(s->cluster_bits), 1853 .size = cpu_to_be64(total_size), 1854 .crypt_method = cpu_to_be32(s->crypt_method_header), 1855 .l1_size = cpu_to_be32(s->l1_size), 1856 .l1_table_offset = cpu_to_be64(s->l1_table_offset), 1857 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset), 1858 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), 1859 .nb_snapshots = cpu_to_be32(s->nb_snapshots), 1860 .snapshots_offset = cpu_to_be64(s->snapshots_offset), 1861 1862 /* Version 3 fields */ 1863 .incompatible_features = cpu_to_be64(s->incompatible_features), 1864 .compatible_features = cpu_to_be64(s->compatible_features), 1865 .autoclear_features = cpu_to_be64(s->autoclear_features), 1866 .refcount_order = cpu_to_be32(s->refcount_order), 1867 .header_length = cpu_to_be32(header_length), 1868 }; 1869 1870 /* For older versions, write a shorter header */ 1871 switch (s->qcow_version) { 1872 case 2: 1873 ret = offsetof(QCowHeader, incompatible_features); 1874 break; 1875 case 3: 1876 ret = sizeof(*header); 1877 break; 1878 default: 1879 ret = -EINVAL; 1880 goto fail; 1881 } 1882 1883 buf += ret; 1884 buflen -= ret; 1885 memset(buf, 0, buflen); 1886 1887 /* Preserve any unknown field in the header */ 1888 if (s->unknown_header_fields_size) { 1889 if (buflen < s->unknown_header_fields_size) { 1890 ret = -ENOSPC; 1891 goto fail; 1892 } 1893 1894 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); 1895 buf += s->unknown_header_fields_size; 1896 buflen -= s->unknown_header_fields_size; 1897 } 1898 1899 /* Backing file format header extension */ 1900 if (s->image_backing_format) { 1901 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, 1902 s->image_backing_format, 1903 strlen(s->image_backing_format), 1904 buflen); 1905 if (ret < 0) { 1906 goto fail; 1907 } 1908 1909 buf += ret; 1910 buflen -= ret; 1911 } 1912 1913 /* Feature table */ 1914 if (s->qcow_version >= 3) { 1915 Qcow2Feature features[] = { 1916 { 1917 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 1918 .bit = QCOW2_INCOMPAT_DIRTY_BITNR, 1919 .name = "dirty bit", 1920 }, 1921 { 1922 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 1923 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, 1924 .name = "corrupt bit", 1925 }, 1926 { 1927 .type = QCOW2_FEAT_TYPE_COMPATIBLE, 1928 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, 1929 .name = "lazy refcounts", 1930 }, 1931 }; 1932 1933 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, 1934 features, sizeof(features), buflen); 1935 if (ret < 0) { 1936 goto fail; 1937 } 1938 buf += ret; 1939 buflen -= ret; 1940 } 1941 1942 /* Keep unknown header extensions */ 1943 QLIST_FOREACH(uext, &s->unknown_header_ext, next) { 1944 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); 1945 if (ret < 0) { 1946 goto fail; 1947 } 1948 1949 buf += ret; 1950 buflen -= ret; 1951 } 1952 1953 /* End of header extensions */ 1954 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); 1955 if (ret < 0) { 1956 goto fail; 1957 } 1958 1959 buf += ret; 1960 buflen -= ret; 1961 1962 /* Backing file name */ 1963 if (s->image_backing_file) { 1964 size_t backing_file_len = strlen(s->image_backing_file); 1965 1966 if (buflen < backing_file_len) { 1967 ret = -ENOSPC; 1968 goto fail; 1969 } 1970 1971 /* Using strncpy is ok here, since buf is not NUL-terminated. */ 1972 strncpy(buf, s->image_backing_file, buflen); 1973 1974 header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); 1975 header->backing_file_size = cpu_to_be32(backing_file_len); 1976 } 1977 1978 /* Write the new header */ 1979 ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size); 1980 if (ret < 0) { 1981 goto fail; 1982 } 1983 1984 ret = 0; 1985 fail: 1986 qemu_vfree(header); 1987 return ret; 1988 } 1989 1990 static int qcow2_change_backing_file(BlockDriverState *bs, 1991 const char *backing_file, const char *backing_fmt) 1992 { 1993 BDRVQcow2State *s = bs->opaque; 1994 1995 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 1996 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 1997 1998 g_free(s->image_backing_file); 1999 g_free(s->image_backing_format); 2000 2001 s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL; 2002 s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL; 2003 2004 return qcow2_update_header(bs); 2005 } 2006 2007 static int preallocate(BlockDriverState *bs) 2008 { 2009 uint64_t nb_sectors; 2010 uint64_t offset; 2011 uint64_t host_offset = 0; 2012 int num; 2013 int ret; 2014 QCowL2Meta *meta; 2015 2016 nb_sectors = bdrv_nb_sectors(bs); 2017 offset = 0; 2018 2019 while (nb_sectors) { 2020 num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS); 2021 ret = qcow2_alloc_cluster_offset(bs, offset, &num, 2022 &host_offset, &meta); 2023 if (ret < 0) { 2024 return ret; 2025 } 2026 2027 while (meta) { 2028 QCowL2Meta *next = meta->next; 2029 2030 ret = qcow2_alloc_cluster_link_l2(bs, meta); 2031 if (ret < 0) { 2032 qcow2_free_any_clusters(bs, meta->alloc_offset, 2033 meta->nb_clusters, QCOW2_DISCARD_NEVER); 2034 return ret; 2035 } 2036 2037 /* There are no dependent requests, but we need to remove our 2038 * request from the list of in-flight requests */ 2039 QLIST_REMOVE(meta, next_in_flight); 2040 2041 g_free(meta); 2042 meta = next; 2043 } 2044 2045 /* TODO Preallocate data if requested */ 2046 2047 nb_sectors -= num; 2048 offset += num << BDRV_SECTOR_BITS; 2049 } 2050 2051 /* 2052 * It is expected that the image file is large enough to actually contain 2053 * all of the allocated clusters (otherwise we get failing reads after 2054 * EOF). Extend the image to the last allocated sector. 2055 */ 2056 if (host_offset != 0) { 2057 uint8_t buf[BDRV_SECTOR_SIZE]; 2058 memset(buf, 0, BDRV_SECTOR_SIZE); 2059 ret = bdrv_write(bs->file->bs, 2060 (host_offset >> BDRV_SECTOR_BITS) + num - 1, 2061 buf, 1); 2062 if (ret < 0) { 2063 return ret; 2064 } 2065 } 2066 2067 return 0; 2068 } 2069 2070 static int qcow2_create2(const char *filename, int64_t total_size, 2071 const char *backing_file, const char *backing_format, 2072 int flags, size_t cluster_size, PreallocMode prealloc, 2073 QemuOpts *opts, int version, int refcount_order, 2074 Error **errp) 2075 { 2076 int cluster_bits; 2077 QDict *options; 2078 2079 /* Calculate cluster_bits */ 2080 cluster_bits = ctz32(cluster_size); 2081 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || 2082 (1 << cluster_bits) != cluster_size) 2083 { 2084 error_setg(errp, "Cluster size must be a power of two between %d and " 2085 "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); 2086 return -EINVAL; 2087 } 2088 2089 /* 2090 * Open the image file and write a minimal qcow2 header. 2091 * 2092 * We keep things simple and start with a zero-sized image. We also 2093 * do without refcount blocks or a L1 table for now. We'll fix the 2094 * inconsistency later. 2095 * 2096 * We do need a refcount table because growing the refcount table means 2097 * allocating two new refcount blocks - the seconds of which would be at 2098 * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file 2099 * size for any qcow2 image. 2100 */ 2101 BlockBackend *blk; 2102 QCowHeader *header; 2103 uint64_t* refcount_table; 2104 Error *local_err = NULL; 2105 int ret; 2106 2107 if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { 2108 /* Note: The following calculation does not need to be exact; if it is a 2109 * bit off, either some bytes will be "leaked" (which is fine) or we 2110 * will need to increase the file size by some bytes (which is fine, 2111 * too, as long as the bulk is allocated here). Therefore, using 2112 * floating point arithmetic is fine. */ 2113 int64_t meta_size = 0; 2114 uint64_t nreftablee, nrefblocke, nl1e, nl2e; 2115 int64_t aligned_total_size = align_offset(total_size, cluster_size); 2116 int refblock_bits, refblock_size; 2117 /* refcount entry size in bytes */ 2118 double rces = (1 << refcount_order) / 8.; 2119 2120 /* see qcow2_open() */ 2121 refblock_bits = cluster_bits - (refcount_order - 3); 2122 refblock_size = 1 << refblock_bits; 2123 2124 /* header: 1 cluster */ 2125 meta_size += cluster_size; 2126 2127 /* total size of L2 tables */ 2128 nl2e = aligned_total_size / cluster_size; 2129 nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t)); 2130 meta_size += nl2e * sizeof(uint64_t); 2131 2132 /* total size of L1 tables */ 2133 nl1e = nl2e * sizeof(uint64_t) / cluster_size; 2134 nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t)); 2135 meta_size += nl1e * sizeof(uint64_t); 2136 2137 /* total size of refcount blocks 2138 * 2139 * note: every host cluster is reference-counted, including metadata 2140 * (even refcount blocks are recursively included). 2141 * Let: 2142 * a = total_size (this is the guest disk size) 2143 * m = meta size not including refcount blocks and refcount tables 2144 * c = cluster size 2145 * y1 = number of refcount blocks entries 2146 * y2 = meta size including everything 2147 * rces = refcount entry size in bytes 2148 * then, 2149 * y1 = (y2 + a)/c 2150 * y2 = y1 * rces + y1 * rces * sizeof(u64) / c + m 2151 * we can get y1: 2152 * y1 = (a + m) / (c - rces - rces * sizeof(u64) / c) 2153 */ 2154 nrefblocke = (aligned_total_size + meta_size + cluster_size) 2155 / (cluster_size - rces - rces * sizeof(uint64_t) 2156 / cluster_size); 2157 meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size; 2158 2159 /* total size of refcount tables */ 2160 nreftablee = nrefblocke / refblock_size; 2161 nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t)); 2162 meta_size += nreftablee * sizeof(uint64_t); 2163 2164 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 2165 aligned_total_size + meta_size, &error_abort); 2166 qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_lookup[prealloc], 2167 &error_abort); 2168 } 2169 2170 ret = bdrv_create_file(filename, opts, &local_err); 2171 if (ret < 0) { 2172 error_propagate(errp, local_err); 2173 return ret; 2174 } 2175 2176 blk = blk_new_open("image", filename, NULL, NULL, 2177 BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, 2178 &local_err); 2179 if (blk == NULL) { 2180 error_propagate(errp, local_err); 2181 return -EIO; 2182 } 2183 2184 blk_set_allow_write_beyond_eof(blk, true); 2185 2186 /* Write the header */ 2187 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); 2188 header = g_malloc0(cluster_size); 2189 *header = (QCowHeader) { 2190 .magic = cpu_to_be32(QCOW_MAGIC), 2191 .version = cpu_to_be32(version), 2192 .cluster_bits = cpu_to_be32(cluster_bits), 2193 .size = cpu_to_be64(0), 2194 .l1_table_offset = cpu_to_be64(0), 2195 .l1_size = cpu_to_be32(0), 2196 .refcount_table_offset = cpu_to_be64(cluster_size), 2197 .refcount_table_clusters = cpu_to_be32(1), 2198 .refcount_order = cpu_to_be32(refcount_order), 2199 .header_length = cpu_to_be32(sizeof(*header)), 2200 }; 2201 2202 if (flags & BLOCK_FLAG_ENCRYPT) { 2203 header->crypt_method = cpu_to_be32(QCOW_CRYPT_AES); 2204 } else { 2205 header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 2206 } 2207 2208 if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) { 2209 header->compatible_features |= 2210 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); 2211 } 2212 2213 ret = blk_pwrite(blk, 0, header, cluster_size); 2214 g_free(header); 2215 if (ret < 0) { 2216 error_setg_errno(errp, -ret, "Could not write qcow2 header"); 2217 goto out; 2218 } 2219 2220 /* Write a refcount table with one refcount block */ 2221 refcount_table = g_malloc0(2 * cluster_size); 2222 refcount_table[0] = cpu_to_be64(2 * cluster_size); 2223 ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size); 2224 g_free(refcount_table); 2225 2226 if (ret < 0) { 2227 error_setg_errno(errp, -ret, "Could not write refcount table"); 2228 goto out; 2229 } 2230 2231 blk_unref(blk); 2232 blk = NULL; 2233 2234 /* 2235 * And now open the image and make it consistent first (i.e. increase the 2236 * refcount of the cluster that is occupied by the header and the refcount 2237 * table) 2238 */ 2239 options = qdict_new(); 2240 qdict_put(options, "driver", qstring_from_str("qcow2")); 2241 blk = blk_new_open("image-qcow2", filename, NULL, options, 2242 BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, 2243 &local_err); 2244 if (blk == NULL) { 2245 error_propagate(errp, local_err); 2246 ret = -EIO; 2247 goto out; 2248 } 2249 2250 ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); 2251 if (ret < 0) { 2252 error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " 2253 "header and refcount table"); 2254 goto out; 2255 2256 } else if (ret != 0) { 2257 error_report("Huh, first cluster in empty image is already in use?"); 2258 abort(); 2259 } 2260 2261 /* Create a full header (including things like feature table) */ 2262 ret = qcow2_update_header(blk_bs(blk)); 2263 if (ret < 0) { 2264 error_setg_errno(errp, -ret, "Could not update qcow2 header"); 2265 goto out; 2266 } 2267 2268 /* Okay, now that we have a valid image, let's give it the right size */ 2269 ret = blk_truncate(blk, total_size); 2270 if (ret < 0) { 2271 error_setg_errno(errp, -ret, "Could not resize image"); 2272 goto out; 2273 } 2274 2275 /* Want a backing file? There you go.*/ 2276 if (backing_file) { 2277 ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format); 2278 if (ret < 0) { 2279 error_setg_errno(errp, -ret, "Could not assign backing file '%s' " 2280 "with format '%s'", backing_file, backing_format); 2281 goto out; 2282 } 2283 } 2284 2285 /* And if we're supposed to preallocate metadata, do that now */ 2286 if (prealloc != PREALLOC_MODE_OFF) { 2287 BDRVQcow2State *s = blk_bs(blk)->opaque; 2288 qemu_co_mutex_lock(&s->lock); 2289 ret = preallocate(blk_bs(blk)); 2290 qemu_co_mutex_unlock(&s->lock); 2291 if (ret < 0) { 2292 error_setg_errno(errp, -ret, "Could not preallocate metadata"); 2293 goto out; 2294 } 2295 } 2296 2297 blk_unref(blk); 2298 blk = NULL; 2299 2300 /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ 2301 options = qdict_new(); 2302 qdict_put(options, "driver", qstring_from_str("qcow2")); 2303 blk = blk_new_open("image-flush", filename, NULL, options, 2304 BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, 2305 &local_err); 2306 if (blk == NULL) { 2307 error_propagate(errp, local_err); 2308 ret = -EIO; 2309 goto out; 2310 } 2311 2312 ret = 0; 2313 out: 2314 if (blk) { 2315 blk_unref(blk); 2316 } 2317 return ret; 2318 } 2319 2320 static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) 2321 { 2322 char *backing_file = NULL; 2323 char *backing_fmt = NULL; 2324 char *buf = NULL; 2325 uint64_t size = 0; 2326 int flags = 0; 2327 size_t cluster_size = DEFAULT_CLUSTER_SIZE; 2328 PreallocMode prealloc; 2329 int version = 3; 2330 uint64_t refcount_bits = 16; 2331 int refcount_order; 2332 Error *local_err = NULL; 2333 int ret; 2334 2335 /* Read out options */ 2336 size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), 2337 BDRV_SECTOR_SIZE); 2338 backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); 2339 backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT); 2340 if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) { 2341 flags |= BLOCK_FLAG_ENCRYPT; 2342 } 2343 cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 2344 DEFAULT_CLUSTER_SIZE); 2345 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 2346 prealloc = qapi_enum_parse(PreallocMode_lookup, buf, 2347 PREALLOC_MODE__MAX, PREALLOC_MODE_OFF, 2348 &local_err); 2349 if (local_err) { 2350 error_propagate(errp, local_err); 2351 ret = -EINVAL; 2352 goto finish; 2353 } 2354 g_free(buf); 2355 buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL); 2356 if (!buf) { 2357 /* keep the default */ 2358 } else if (!strcmp(buf, "0.10")) { 2359 version = 2; 2360 } else if (!strcmp(buf, "1.1")) { 2361 version = 3; 2362 } else { 2363 error_setg(errp, "Invalid compatibility level: '%s'", buf); 2364 ret = -EINVAL; 2365 goto finish; 2366 } 2367 2368 if (qemu_opt_get_bool_del(opts, BLOCK_OPT_LAZY_REFCOUNTS, false)) { 2369 flags |= BLOCK_FLAG_LAZY_REFCOUNTS; 2370 } 2371 2372 if (backing_file && prealloc != PREALLOC_MODE_OFF) { 2373 error_setg(errp, "Backing file and preallocation cannot be used at " 2374 "the same time"); 2375 ret = -EINVAL; 2376 goto finish; 2377 } 2378 2379 if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) { 2380 error_setg(errp, "Lazy refcounts only supported with compatibility " 2381 "level 1.1 and above (use compat=1.1 or greater)"); 2382 ret = -EINVAL; 2383 goto finish; 2384 } 2385 2386 refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 2387 refcount_bits); 2388 if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { 2389 error_setg(errp, "Refcount width must be a power of two and may not " 2390 "exceed 64 bits"); 2391 ret = -EINVAL; 2392 goto finish; 2393 } 2394 2395 if (version < 3 && refcount_bits != 16) { 2396 error_setg(errp, "Different refcount widths than 16 bits require " 2397 "compatibility level 1.1 or above (use compat=1.1 or " 2398 "greater)"); 2399 ret = -EINVAL; 2400 goto finish; 2401 } 2402 2403 refcount_order = ctz32(refcount_bits); 2404 2405 ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags, 2406 cluster_size, prealloc, opts, version, refcount_order, 2407 &local_err); 2408 if (local_err) { 2409 error_propagate(errp, local_err); 2410 } 2411 2412 finish: 2413 g_free(backing_file); 2414 g_free(backing_fmt); 2415 g_free(buf); 2416 return ret; 2417 } 2418 2419 static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, 2420 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) 2421 { 2422 int ret; 2423 BDRVQcow2State *s = bs->opaque; 2424 2425 /* Emulate misaligned zero writes */ 2426 if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) { 2427 return -ENOTSUP; 2428 } 2429 2430 /* Whatever is left can use real zero clusters */ 2431 qemu_co_mutex_lock(&s->lock); 2432 ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS, 2433 nb_sectors); 2434 qemu_co_mutex_unlock(&s->lock); 2435 2436 return ret; 2437 } 2438 2439 static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, 2440 int64_t sector_num, int nb_sectors) 2441 { 2442 int ret; 2443 BDRVQcow2State *s = bs->opaque; 2444 2445 qemu_co_mutex_lock(&s->lock); 2446 ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, 2447 nb_sectors, QCOW2_DISCARD_REQUEST, false); 2448 qemu_co_mutex_unlock(&s->lock); 2449 return ret; 2450 } 2451 2452 static int qcow2_truncate(BlockDriverState *bs, int64_t offset) 2453 { 2454 BDRVQcow2State *s = bs->opaque; 2455 int64_t new_l1_size; 2456 int ret; 2457 2458 if (offset & 511) { 2459 error_report("The new size must be a multiple of 512"); 2460 return -EINVAL; 2461 } 2462 2463 /* cannot proceed if image has snapshots */ 2464 if (s->nb_snapshots) { 2465 error_report("Can't resize an image which has snapshots"); 2466 return -ENOTSUP; 2467 } 2468 2469 /* shrinking is currently not supported */ 2470 if (offset < bs->total_sectors * 512) { 2471 error_report("qcow2 doesn't support shrinking images yet"); 2472 return -ENOTSUP; 2473 } 2474 2475 new_l1_size = size_to_l1(s, offset); 2476 ret = qcow2_grow_l1_table(bs, new_l1_size, true); 2477 if (ret < 0) { 2478 return ret; 2479 } 2480 2481 /* write updated header.size */ 2482 offset = cpu_to_be64(offset); 2483 ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size), 2484 &offset, sizeof(uint64_t)); 2485 if (ret < 0) { 2486 return ret; 2487 } 2488 2489 s->l1_vm_state_index = new_l1_size; 2490 return 0; 2491 } 2492 2493 /* XXX: put compressed sectors first, then all the cluster aligned 2494 tables to avoid losing bytes in alignment */ 2495 static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, 2496 const uint8_t *buf, int nb_sectors) 2497 { 2498 BDRVQcow2State *s = bs->opaque; 2499 z_stream strm; 2500 int ret, out_len; 2501 uint8_t *out_buf; 2502 uint64_t cluster_offset; 2503 2504 if (nb_sectors == 0) { 2505 /* align end of file to a sector boundary to ease reading with 2506 sector based I/Os */ 2507 cluster_offset = bdrv_getlength(bs->file->bs); 2508 return bdrv_truncate(bs->file->bs, cluster_offset); 2509 } 2510 2511 if (nb_sectors != s->cluster_sectors) { 2512 ret = -EINVAL; 2513 2514 /* Zero-pad last write if image size is not cluster aligned */ 2515 if (sector_num + nb_sectors == bs->total_sectors && 2516 nb_sectors < s->cluster_sectors) { 2517 uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size); 2518 memset(pad_buf, 0, s->cluster_size); 2519 memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE); 2520 ret = qcow2_write_compressed(bs, sector_num, 2521 pad_buf, s->cluster_sectors); 2522 qemu_vfree(pad_buf); 2523 } 2524 return ret; 2525 } 2526 2527 out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); 2528 2529 /* best compression, small window, no zlib header */ 2530 memset(&strm, 0, sizeof(strm)); 2531 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, 2532 Z_DEFLATED, -12, 2533 9, Z_DEFAULT_STRATEGY); 2534 if (ret != 0) { 2535 ret = -EINVAL; 2536 goto fail; 2537 } 2538 2539 strm.avail_in = s->cluster_size; 2540 strm.next_in = (uint8_t *)buf; 2541 strm.avail_out = s->cluster_size; 2542 strm.next_out = out_buf; 2543 2544 ret = deflate(&strm, Z_FINISH); 2545 if (ret != Z_STREAM_END && ret != Z_OK) { 2546 deflateEnd(&strm); 2547 ret = -EINVAL; 2548 goto fail; 2549 } 2550 out_len = strm.next_out - out_buf; 2551 2552 deflateEnd(&strm); 2553 2554 if (ret != Z_STREAM_END || out_len >= s->cluster_size) { 2555 /* could not compress: write normal cluster */ 2556 ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors); 2557 if (ret < 0) { 2558 goto fail; 2559 } 2560 } else { 2561 cluster_offset = qcow2_alloc_compressed_cluster_offset(bs, 2562 sector_num << 9, out_len); 2563 if (!cluster_offset) { 2564 ret = -EIO; 2565 goto fail; 2566 } 2567 cluster_offset &= s->cluster_offset_mask; 2568 2569 ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len); 2570 if (ret < 0) { 2571 goto fail; 2572 } 2573 2574 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); 2575 ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len); 2576 if (ret < 0) { 2577 goto fail; 2578 } 2579 } 2580 2581 ret = 0; 2582 fail: 2583 g_free(out_buf); 2584 return ret; 2585 } 2586 2587 static int make_completely_empty(BlockDriverState *bs) 2588 { 2589 BDRVQcow2State *s = bs->opaque; 2590 int ret, l1_clusters; 2591 int64_t offset; 2592 uint64_t *new_reftable = NULL; 2593 uint64_t rt_entry, l1_size2; 2594 struct { 2595 uint64_t l1_offset; 2596 uint64_t reftable_offset; 2597 uint32_t reftable_clusters; 2598 } QEMU_PACKED l1_ofs_rt_ofs_cls; 2599 2600 ret = qcow2_cache_empty(bs, s->l2_table_cache); 2601 if (ret < 0) { 2602 goto fail; 2603 } 2604 2605 ret = qcow2_cache_empty(bs, s->refcount_block_cache); 2606 if (ret < 0) { 2607 goto fail; 2608 } 2609 2610 /* Refcounts will be broken utterly */ 2611 ret = qcow2_mark_dirty(bs); 2612 if (ret < 0) { 2613 goto fail; 2614 } 2615 2616 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 2617 2618 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 2619 l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t); 2620 2621 /* After this call, neither the in-memory nor the on-disk refcount 2622 * information accurately describe the actual references */ 2623 2624 ret = bdrv_write_zeroes(bs->file->bs, s->l1_table_offset / BDRV_SECTOR_SIZE, 2625 l1_clusters * s->cluster_sectors, 0); 2626 if (ret < 0) { 2627 goto fail_broken_refcounts; 2628 } 2629 memset(s->l1_table, 0, l1_size2); 2630 2631 BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE); 2632 2633 /* Overwrite enough clusters at the beginning of the sectors to place 2634 * the refcount table, a refcount block and the L1 table in; this may 2635 * overwrite parts of the existing refcount and L1 table, which is not 2636 * an issue because the dirty flag is set, complete data loss is in fact 2637 * desired and partial data loss is consequently fine as well */ 2638 ret = bdrv_write_zeroes(bs->file->bs, s->cluster_size / BDRV_SECTOR_SIZE, 2639 (2 + l1_clusters) * s->cluster_size / 2640 BDRV_SECTOR_SIZE, 0); 2641 /* This call (even if it failed overall) may have overwritten on-disk 2642 * refcount structures; in that case, the in-memory refcount information 2643 * will probably differ from the on-disk information which makes the BDS 2644 * unusable */ 2645 if (ret < 0) { 2646 goto fail_broken_refcounts; 2647 } 2648 2649 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 2650 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); 2651 2652 /* "Create" an empty reftable (one cluster) directly after the image 2653 * header and an empty L1 table three clusters after the image header; 2654 * the cluster between those two will be used as the first refblock */ 2655 cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size); 2656 cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size); 2657 cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1); 2658 ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset), 2659 &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls)); 2660 if (ret < 0) { 2661 goto fail_broken_refcounts; 2662 } 2663 2664 s->l1_table_offset = 3 * s->cluster_size; 2665 2666 new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t)); 2667 if (!new_reftable) { 2668 ret = -ENOMEM; 2669 goto fail_broken_refcounts; 2670 } 2671 2672 s->refcount_table_offset = s->cluster_size; 2673 s->refcount_table_size = s->cluster_size / sizeof(uint64_t); 2674 2675 g_free(s->refcount_table); 2676 s->refcount_table = new_reftable; 2677 new_reftable = NULL; 2678 2679 /* Now the in-memory refcount information again corresponds to the on-disk 2680 * information (reftable is empty and no refblocks (the refblock cache is 2681 * empty)); however, this means some clusters (e.g. the image header) are 2682 * referenced, but not refcounted, but the normal qcow2 code assumes that 2683 * the in-memory information is always correct */ 2684 2685 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); 2686 2687 /* Enter the first refblock into the reftable */ 2688 rt_entry = cpu_to_be64(2 * s->cluster_size); 2689 ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size, 2690 &rt_entry, sizeof(rt_entry)); 2691 if (ret < 0) { 2692 goto fail_broken_refcounts; 2693 } 2694 s->refcount_table[0] = 2 * s->cluster_size; 2695 2696 s->free_cluster_index = 0; 2697 assert(3 + l1_clusters <= s->refcount_block_size); 2698 offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2); 2699 if (offset < 0) { 2700 ret = offset; 2701 goto fail_broken_refcounts; 2702 } else if (offset > 0) { 2703 error_report("First cluster in emptied image is in use"); 2704 abort(); 2705 } 2706 2707 /* Now finally the in-memory information corresponds to the on-disk 2708 * structures and is correct */ 2709 ret = qcow2_mark_clean(bs); 2710 if (ret < 0) { 2711 goto fail; 2712 } 2713 2714 ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size); 2715 if (ret < 0) { 2716 goto fail; 2717 } 2718 2719 return 0; 2720 2721 fail_broken_refcounts: 2722 /* The BDS is unusable at this point. If we wanted to make it usable, we 2723 * would have to call qcow2_refcount_close(), qcow2_refcount_init(), 2724 * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init() 2725 * again. However, because the functions which could have caused this error 2726 * path to be taken are used by those functions as well, it's very likely 2727 * that that sequence will fail as well. Therefore, just eject the BDS. */ 2728 bs->drv = NULL; 2729 2730 fail: 2731 g_free(new_reftable); 2732 return ret; 2733 } 2734 2735 static int qcow2_make_empty(BlockDriverState *bs) 2736 { 2737 BDRVQcow2State *s = bs->opaque; 2738 uint64_t start_sector; 2739 int sector_step = INT_MAX / BDRV_SECTOR_SIZE; 2740 int l1_clusters, ret = 0; 2741 2742 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 2743 2744 if (s->qcow_version >= 3 && !s->snapshots && 2745 3 + l1_clusters <= s->refcount_block_size) { 2746 /* The following function only works for qcow2 v3 images (it requires 2747 * the dirty flag) and only as long as there are no snapshots (because 2748 * it completely empties the image). Furthermore, the L1 table and three 2749 * additional clusters (image header, refcount table, one refcount 2750 * block) have to fit inside one refcount block. */ 2751 return make_completely_empty(bs); 2752 } 2753 2754 /* This fallback code simply discards every active cluster; this is slow, 2755 * but works in all cases */ 2756 for (start_sector = 0; start_sector < bs->total_sectors; 2757 start_sector += sector_step) 2758 { 2759 /* As this function is generally used after committing an external 2760 * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the 2761 * default action for this kind of discard is to pass the discard, 2762 * which will ideally result in an actually smaller image file, as 2763 * is probably desired. */ 2764 ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE, 2765 MIN(sector_step, 2766 bs->total_sectors - start_sector), 2767 QCOW2_DISCARD_SNAPSHOT, true); 2768 if (ret < 0) { 2769 break; 2770 } 2771 } 2772 2773 return ret; 2774 } 2775 2776 static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) 2777 { 2778 BDRVQcow2State *s = bs->opaque; 2779 int ret; 2780 2781 qemu_co_mutex_lock(&s->lock); 2782 ret = qcow2_cache_flush(bs, s->l2_table_cache); 2783 if (ret < 0) { 2784 qemu_co_mutex_unlock(&s->lock); 2785 return ret; 2786 } 2787 2788 if (qcow2_need_accurate_refcounts(s)) { 2789 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 2790 if (ret < 0) { 2791 qemu_co_mutex_unlock(&s->lock); 2792 return ret; 2793 } 2794 } 2795 qemu_co_mutex_unlock(&s->lock); 2796 2797 return 0; 2798 } 2799 2800 static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 2801 { 2802 BDRVQcow2State *s = bs->opaque; 2803 bdi->unallocated_blocks_are_zero = true; 2804 bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3); 2805 bdi->cluster_size = s->cluster_size; 2806 bdi->vm_state_offset = qcow2_vm_state_offset(s); 2807 return 0; 2808 } 2809 2810 static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) 2811 { 2812 BDRVQcow2State *s = bs->opaque; 2813 ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); 2814 2815 *spec_info = (ImageInfoSpecific){ 2816 .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, 2817 .u.qcow2 = g_new(ImageInfoSpecificQCow2, 1), 2818 }; 2819 if (s->qcow_version == 2) { 2820 *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){ 2821 .compat = g_strdup("0.10"), 2822 .refcount_bits = s->refcount_bits, 2823 }; 2824 } else if (s->qcow_version == 3) { 2825 *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){ 2826 .compat = g_strdup("1.1"), 2827 .lazy_refcounts = s->compatible_features & 2828 QCOW2_COMPAT_LAZY_REFCOUNTS, 2829 .has_lazy_refcounts = true, 2830 .corrupt = s->incompatible_features & 2831 QCOW2_INCOMPAT_CORRUPT, 2832 .has_corrupt = true, 2833 .refcount_bits = s->refcount_bits, 2834 }; 2835 } else { 2836 /* if this assertion fails, this probably means a new version was 2837 * added without having it covered here */ 2838 assert(false); 2839 } 2840 2841 return spec_info; 2842 } 2843 2844 #if 0 2845 static void dump_refcounts(BlockDriverState *bs) 2846 { 2847 BDRVQcow2State *s = bs->opaque; 2848 int64_t nb_clusters, k, k1, size; 2849 int refcount; 2850 2851 size = bdrv_getlength(bs->file->bs); 2852 nb_clusters = size_to_clusters(s, size); 2853 for(k = 0; k < nb_clusters;) { 2854 k1 = k; 2855 refcount = get_refcount(bs, k); 2856 k++; 2857 while (k < nb_clusters && get_refcount(bs, k) == refcount) 2858 k++; 2859 printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount, 2860 k - k1); 2861 } 2862 } 2863 #endif 2864 2865 static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 2866 int64_t pos) 2867 { 2868 BDRVQcow2State *s = bs->opaque; 2869 int64_t total_sectors = bs->total_sectors; 2870 bool zero_beyond_eof = bs->zero_beyond_eof; 2871 int ret; 2872 2873 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); 2874 bs->zero_beyond_eof = false; 2875 ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov); 2876 bs->zero_beyond_eof = zero_beyond_eof; 2877 2878 /* bdrv_co_do_writev will have increased the total_sectors value to include 2879 * the VM state - the VM state is however not an actual part of the block 2880 * device, therefore, we need to restore the old value. */ 2881 bs->total_sectors = total_sectors; 2882 2883 return ret; 2884 } 2885 2886 static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, 2887 int64_t pos, int size) 2888 { 2889 BDRVQcow2State *s = bs->opaque; 2890 bool zero_beyond_eof = bs->zero_beyond_eof; 2891 int ret; 2892 2893 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); 2894 bs->zero_beyond_eof = false; 2895 ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size); 2896 bs->zero_beyond_eof = zero_beyond_eof; 2897 2898 return ret; 2899 } 2900 2901 /* 2902 * Downgrades an image's version. To achieve this, any incompatible features 2903 * have to be removed. 2904 */ 2905 static int qcow2_downgrade(BlockDriverState *bs, int target_version, 2906 BlockDriverAmendStatusCB *status_cb, void *cb_opaque) 2907 { 2908 BDRVQcow2State *s = bs->opaque; 2909 int current_version = s->qcow_version; 2910 int ret; 2911 2912 if (target_version == current_version) { 2913 return 0; 2914 } else if (target_version > current_version) { 2915 return -EINVAL; 2916 } else if (target_version != 2) { 2917 return -EINVAL; 2918 } 2919 2920 if (s->refcount_order != 4) { 2921 error_report("compat=0.10 requires refcount_bits=16"); 2922 return -ENOTSUP; 2923 } 2924 2925 /* clear incompatible features */ 2926 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 2927 ret = qcow2_mark_clean(bs); 2928 if (ret < 0) { 2929 return ret; 2930 } 2931 } 2932 2933 /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in 2934 * the first place; if that happens nonetheless, returning -ENOTSUP is the 2935 * best thing to do anyway */ 2936 2937 if (s->incompatible_features) { 2938 return -ENOTSUP; 2939 } 2940 2941 /* since we can ignore compatible features, we can set them to 0 as well */ 2942 s->compatible_features = 0; 2943 /* if lazy refcounts have been used, they have already been fixed through 2944 * clearing the dirty flag */ 2945 2946 /* clearing autoclear features is trivial */ 2947 s->autoclear_features = 0; 2948 2949 ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque); 2950 if (ret < 0) { 2951 return ret; 2952 } 2953 2954 s->qcow_version = target_version; 2955 ret = qcow2_update_header(bs); 2956 if (ret < 0) { 2957 s->qcow_version = current_version; 2958 return ret; 2959 } 2960 return 0; 2961 } 2962 2963 typedef enum Qcow2AmendOperation { 2964 /* This is the value Qcow2AmendHelperCBInfo::last_operation will be 2965 * statically initialized to so that the helper CB can discern the first 2966 * invocation from an operation change */ 2967 QCOW2_NO_OPERATION = 0, 2968 2969 QCOW2_CHANGING_REFCOUNT_ORDER, 2970 QCOW2_DOWNGRADING, 2971 } Qcow2AmendOperation; 2972 2973 typedef struct Qcow2AmendHelperCBInfo { 2974 /* The code coordinating the amend operations should only modify 2975 * these four fields; the rest will be managed by the CB */ 2976 BlockDriverAmendStatusCB *original_status_cb; 2977 void *original_cb_opaque; 2978 2979 Qcow2AmendOperation current_operation; 2980 2981 /* Total number of operations to perform (only set once) */ 2982 int total_operations; 2983 2984 /* The following fields are managed by the CB */ 2985 2986 /* Number of operations completed */ 2987 int operations_completed; 2988 2989 /* Cumulative offset of all completed operations */ 2990 int64_t offset_completed; 2991 2992 Qcow2AmendOperation last_operation; 2993 int64_t last_work_size; 2994 } Qcow2AmendHelperCBInfo; 2995 2996 static void qcow2_amend_helper_cb(BlockDriverState *bs, 2997 int64_t operation_offset, 2998 int64_t operation_work_size, void *opaque) 2999 { 3000 Qcow2AmendHelperCBInfo *info = opaque; 3001 int64_t current_work_size; 3002 int64_t projected_work_size; 3003 3004 if (info->current_operation != info->last_operation) { 3005 if (info->last_operation != QCOW2_NO_OPERATION) { 3006 info->offset_completed += info->last_work_size; 3007 info->operations_completed++; 3008 } 3009 3010 info->last_operation = info->current_operation; 3011 } 3012 3013 assert(info->total_operations > 0); 3014 assert(info->operations_completed < info->total_operations); 3015 3016 info->last_work_size = operation_work_size; 3017 3018 current_work_size = info->offset_completed + operation_work_size; 3019 3020 /* current_work_size is the total work size for (operations_completed + 1) 3021 * operations (which includes this one), so multiply it by the number of 3022 * operations not covered and divide it by the number of operations 3023 * covered to get a projection for the operations not covered */ 3024 projected_work_size = current_work_size * (info->total_operations - 3025 info->operations_completed - 1) 3026 / (info->operations_completed + 1); 3027 3028 info->original_status_cb(bs, info->offset_completed + operation_offset, 3029 current_work_size + projected_work_size, 3030 info->original_cb_opaque); 3031 } 3032 3033 static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, 3034 BlockDriverAmendStatusCB *status_cb, 3035 void *cb_opaque) 3036 { 3037 BDRVQcow2State *s = bs->opaque; 3038 int old_version = s->qcow_version, new_version = old_version; 3039 uint64_t new_size = 0; 3040 const char *backing_file = NULL, *backing_format = NULL; 3041 bool lazy_refcounts = s->use_lazy_refcounts; 3042 const char *compat = NULL; 3043 uint64_t cluster_size = s->cluster_size; 3044 bool encrypt; 3045 int refcount_bits = s->refcount_bits; 3046 int ret; 3047 QemuOptDesc *desc = opts->list->desc; 3048 Qcow2AmendHelperCBInfo helper_cb_info; 3049 3050 while (desc && desc->name) { 3051 if (!qemu_opt_find(opts, desc->name)) { 3052 /* only change explicitly defined options */ 3053 desc++; 3054 continue; 3055 } 3056 3057 if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) { 3058 compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL); 3059 if (!compat) { 3060 /* preserve default */ 3061 } else if (!strcmp(compat, "0.10")) { 3062 new_version = 2; 3063 } else if (!strcmp(compat, "1.1")) { 3064 new_version = 3; 3065 } else { 3066 error_report("Unknown compatibility level %s", compat); 3067 return -EINVAL; 3068 } 3069 } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) { 3070 error_report("Cannot change preallocation mode"); 3071 return -ENOTSUP; 3072 } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { 3073 new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 3074 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) { 3075 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 3076 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) { 3077 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 3078 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) { 3079 encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, 3080 !!s->cipher); 3081 3082 if (encrypt != !!s->cipher) { 3083 error_report("Changing the encryption flag is not supported"); 3084 return -ENOTSUP; 3085 } 3086 } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { 3087 cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 3088 cluster_size); 3089 if (cluster_size != s->cluster_size) { 3090 error_report("Changing the cluster size is not supported"); 3091 return -ENOTSUP; 3092 } 3093 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { 3094 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, 3095 lazy_refcounts); 3096 } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { 3097 refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS, 3098 refcount_bits); 3099 3100 if (refcount_bits <= 0 || refcount_bits > 64 || 3101 !is_power_of_2(refcount_bits)) 3102 { 3103 error_report("Refcount width must be a power of two and may " 3104 "not exceed 64 bits"); 3105 return -EINVAL; 3106 } 3107 } else { 3108 /* if this point is reached, this probably means a new option was 3109 * added without having it covered here */ 3110 abort(); 3111 } 3112 3113 desc++; 3114 } 3115 3116 helper_cb_info = (Qcow2AmendHelperCBInfo){ 3117 .original_status_cb = status_cb, 3118 .original_cb_opaque = cb_opaque, 3119 .total_operations = (new_version < old_version) 3120 + (s->refcount_bits != refcount_bits) 3121 }; 3122 3123 /* Upgrade first (some features may require compat=1.1) */ 3124 if (new_version > old_version) { 3125 s->qcow_version = new_version; 3126 ret = qcow2_update_header(bs); 3127 if (ret < 0) { 3128 s->qcow_version = old_version; 3129 return ret; 3130 } 3131 } 3132 3133 if (s->refcount_bits != refcount_bits) { 3134 int refcount_order = ctz32(refcount_bits); 3135 Error *local_error = NULL; 3136 3137 if (new_version < 3 && refcount_bits != 16) { 3138 error_report("Different refcount widths than 16 bits require " 3139 "compatibility level 1.1 or above (use compat=1.1 or " 3140 "greater)"); 3141 return -EINVAL; 3142 } 3143 3144 helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; 3145 ret = qcow2_change_refcount_order(bs, refcount_order, 3146 &qcow2_amend_helper_cb, 3147 &helper_cb_info, &local_error); 3148 if (ret < 0) { 3149 error_report_err(local_error); 3150 return ret; 3151 } 3152 } 3153 3154 if (backing_file || backing_format) { 3155 ret = qcow2_change_backing_file(bs, 3156 backing_file ?: s->image_backing_file, 3157 backing_format ?: s->image_backing_format); 3158 if (ret < 0) { 3159 return ret; 3160 } 3161 } 3162 3163 if (s->use_lazy_refcounts != lazy_refcounts) { 3164 if (lazy_refcounts) { 3165 if (new_version < 3) { 3166 error_report("Lazy refcounts only supported with compatibility " 3167 "level 1.1 and above (use compat=1.1 or greater)"); 3168 return -EINVAL; 3169 } 3170 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 3171 ret = qcow2_update_header(bs); 3172 if (ret < 0) { 3173 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 3174 return ret; 3175 } 3176 s->use_lazy_refcounts = true; 3177 } else { 3178 /* make image clean first */ 3179 ret = qcow2_mark_clean(bs); 3180 if (ret < 0) { 3181 return ret; 3182 } 3183 /* now disallow lazy refcounts */ 3184 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 3185 ret = qcow2_update_header(bs); 3186 if (ret < 0) { 3187 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 3188 return ret; 3189 } 3190 s->use_lazy_refcounts = false; 3191 } 3192 } 3193 3194 if (new_size) { 3195 ret = bdrv_truncate(bs, new_size); 3196 if (ret < 0) { 3197 return ret; 3198 } 3199 } 3200 3201 /* Downgrade last (so unsupported features can be removed before) */ 3202 if (new_version < old_version) { 3203 helper_cb_info.current_operation = QCOW2_DOWNGRADING; 3204 ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb, 3205 &helper_cb_info); 3206 if (ret < 0) { 3207 return ret; 3208 } 3209 } 3210 3211 return 0; 3212 } 3213 3214 /* 3215 * If offset or size are negative, respectively, they will not be included in 3216 * the BLOCK_IMAGE_CORRUPTED event emitted. 3217 * fatal will be ignored for read-only BDS; corruptions found there will always 3218 * be considered non-fatal. 3219 */ 3220 void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, 3221 int64_t size, const char *message_format, ...) 3222 { 3223 BDRVQcow2State *s = bs->opaque; 3224 const char *node_name; 3225 char *message; 3226 va_list ap; 3227 3228 fatal = fatal && !bs->read_only; 3229 3230 if (s->signaled_corruption && 3231 (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT))) 3232 { 3233 return; 3234 } 3235 3236 va_start(ap, message_format); 3237 message = g_strdup_vprintf(message_format, ap); 3238 va_end(ap); 3239 3240 if (fatal) { 3241 fprintf(stderr, "qcow2: Marking image as corrupt: %s; further " 3242 "corruption events will be suppressed\n", message); 3243 } else { 3244 fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal " 3245 "corruption events will be suppressed\n", message); 3246 } 3247 3248 node_name = bdrv_get_node_name(bs); 3249 qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), 3250 *node_name != '\0', node_name, 3251 message, offset >= 0, offset, 3252 size >= 0, size, 3253 fatal, &error_abort); 3254 g_free(message); 3255 3256 if (fatal) { 3257 qcow2_mark_corrupt(bs); 3258 bs->drv = NULL; /* make BDS unusable */ 3259 } 3260 3261 s->signaled_corruption = true; 3262 } 3263 3264 static QemuOptsList qcow2_create_opts = { 3265 .name = "qcow2-create-opts", 3266 .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head), 3267 .desc = { 3268 { 3269 .name = BLOCK_OPT_SIZE, 3270 .type = QEMU_OPT_SIZE, 3271 .help = "Virtual disk size" 3272 }, 3273 { 3274 .name = BLOCK_OPT_COMPAT_LEVEL, 3275 .type = QEMU_OPT_STRING, 3276 .help = "Compatibility level (0.10 or 1.1)" 3277 }, 3278 { 3279 .name = BLOCK_OPT_BACKING_FILE, 3280 .type = QEMU_OPT_STRING, 3281 .help = "File name of a base image" 3282 }, 3283 { 3284 .name = BLOCK_OPT_BACKING_FMT, 3285 .type = QEMU_OPT_STRING, 3286 .help = "Image format of the base image" 3287 }, 3288 { 3289 .name = BLOCK_OPT_ENCRYPT, 3290 .type = QEMU_OPT_BOOL, 3291 .help = "Encrypt the image", 3292 .def_value_str = "off" 3293 }, 3294 { 3295 .name = BLOCK_OPT_CLUSTER_SIZE, 3296 .type = QEMU_OPT_SIZE, 3297 .help = "qcow2 cluster size", 3298 .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) 3299 }, 3300 { 3301 .name = BLOCK_OPT_PREALLOC, 3302 .type = QEMU_OPT_STRING, 3303 .help = "Preallocation mode (allowed values: off, metadata, " 3304 "falloc, full)" 3305 }, 3306 { 3307 .name = BLOCK_OPT_LAZY_REFCOUNTS, 3308 .type = QEMU_OPT_BOOL, 3309 .help = "Postpone refcount updates", 3310 .def_value_str = "off" 3311 }, 3312 { 3313 .name = BLOCK_OPT_REFCOUNT_BITS, 3314 .type = QEMU_OPT_NUMBER, 3315 .help = "Width of a reference count entry in bits", 3316 .def_value_str = "16" 3317 }, 3318 { /* end of list */ } 3319 } 3320 }; 3321 3322 BlockDriver bdrv_qcow2 = { 3323 .format_name = "qcow2", 3324 .instance_size = sizeof(BDRVQcow2State), 3325 .bdrv_probe = qcow2_probe, 3326 .bdrv_open = qcow2_open, 3327 .bdrv_close = qcow2_close, 3328 .bdrv_reopen_prepare = qcow2_reopen_prepare, 3329 .bdrv_reopen_commit = qcow2_reopen_commit, 3330 .bdrv_reopen_abort = qcow2_reopen_abort, 3331 .bdrv_join_options = qcow2_join_options, 3332 .bdrv_create = qcow2_create, 3333 .bdrv_has_zero_init = bdrv_has_zero_init_1, 3334 .bdrv_co_get_block_status = qcow2_co_get_block_status, 3335 .bdrv_set_key = qcow2_set_key, 3336 3337 .bdrv_co_readv = qcow2_co_readv, 3338 .bdrv_co_writev = qcow2_co_writev, 3339 .bdrv_co_flush_to_os = qcow2_co_flush_to_os, 3340 3341 .bdrv_co_write_zeroes = qcow2_co_write_zeroes, 3342 .bdrv_co_discard = qcow2_co_discard, 3343 .bdrv_truncate = qcow2_truncate, 3344 .bdrv_write_compressed = qcow2_write_compressed, 3345 .bdrv_make_empty = qcow2_make_empty, 3346 3347 .bdrv_snapshot_create = qcow2_snapshot_create, 3348 .bdrv_snapshot_goto = qcow2_snapshot_goto, 3349 .bdrv_snapshot_delete = qcow2_snapshot_delete, 3350 .bdrv_snapshot_list = qcow2_snapshot_list, 3351 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, 3352 .bdrv_get_info = qcow2_get_info, 3353 .bdrv_get_specific_info = qcow2_get_specific_info, 3354 3355 .bdrv_save_vmstate = qcow2_save_vmstate, 3356 .bdrv_load_vmstate = qcow2_load_vmstate, 3357 3358 .supports_backing = true, 3359 .bdrv_change_backing_file = qcow2_change_backing_file, 3360 3361 .bdrv_refresh_limits = qcow2_refresh_limits, 3362 .bdrv_invalidate_cache = qcow2_invalidate_cache, 3363 .bdrv_inactivate = qcow2_inactivate, 3364 3365 .create_opts = &qcow2_create_opts, 3366 .bdrv_check = qcow2_check, 3367 .bdrv_amend_options = qcow2_amend_options, 3368 3369 .bdrv_detach_aio_context = qcow2_detach_aio_context, 3370 .bdrv_attach_aio_context = qcow2_attach_aio_context, 3371 }; 3372 3373 static void bdrv_qcow2_init(void) 3374 { 3375 bdrv_register(&bdrv_qcow2); 3376 } 3377 3378 block_init(bdrv_qcow2_init); 3379