1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "block/block_int.h" 27 #include "sysemu/block-backend.h" 28 #include "qemu/module.h" 29 #include <zlib.h> 30 #include "block/qcow2.h" 31 #include "qemu/error-report.h" 32 #include "qapi/error.h" 33 #include "qapi/qapi-events-block-core.h" 34 #include "qapi/qmp/qdict.h" 35 #include "qapi/qmp/qstring.h" 36 #include "trace.h" 37 #include "qemu/option_int.h" 38 #include "qemu/cutils.h" 39 #include "qemu/bswap.h" 40 #include "qapi/qobject-input-visitor.h" 41 #include "qapi/qapi-visit-block-core.h" 42 #include "block/crypto.h" 43 44 /* 45 Differences with QCOW: 46 47 - Support for multiple incremental snapshots. 48 - Memory management by reference counts. 49 - Clusters which have a reference count of one have the bit 50 QCOW_OFLAG_COPIED to optimize write performance. 51 - Size of compressed clusters is stored in sectors to reduce bit usage 52 in the cluster offsets. 53 - Support for storing additional data (such as the VM state) in the 54 snapshots. 55 - If a backing store is used, the cluster size is not constrained 56 (could be backported to QCOW). 57 - L2 tables have always a size of one cluster. 58 */ 59 60 61 typedef struct { 62 uint32_t magic; 63 uint32_t len; 64 } QEMU_PACKED QCowExtension; 65 66 #define QCOW2_EXT_MAGIC_END 0 67 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA 68 #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 69 #define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77 70 #define QCOW2_EXT_MAGIC_BITMAPS 0x23852875 71 72 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) 73 { 74 const QCowHeader *cow_header = (const void *)buf; 75 76 if (buf_size >= sizeof(QCowHeader) && 77 be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 78 be32_to_cpu(cow_header->version) >= 2) 79 return 100; 80 else 81 return 0; 82 } 83 84 85 static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset, 86 uint8_t *buf, size_t buflen, 87 void *opaque, Error **errp) 88 { 89 BlockDriverState *bs = opaque; 90 BDRVQcow2State *s = bs->opaque; 91 ssize_t ret; 92 93 if ((offset + buflen) > s->crypto_header.length) { 94 error_setg(errp, "Request for data outside of extension header"); 95 return -1; 96 } 97 98 ret = bdrv_pread(bs->file, 99 s->crypto_header.offset + offset, buf, buflen); 100 if (ret < 0) { 101 error_setg_errno(errp, -ret, "Could not read encryption header"); 102 return -1; 103 } 104 return ret; 105 } 106 107 108 static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, 109 void *opaque, Error **errp) 110 { 111 BlockDriverState *bs = opaque; 112 BDRVQcow2State *s = bs->opaque; 113 int64_t ret; 114 int64_t clusterlen; 115 116 ret = qcow2_alloc_clusters(bs, headerlen); 117 if (ret < 0) { 118 error_setg_errno(errp, -ret, 119 "Cannot allocate cluster for LUKS header size %zu", 120 headerlen); 121 return -1; 122 } 123 124 s->crypto_header.length = headerlen; 125 s->crypto_header.offset = ret; 126 127 /* Zero fill remaining space in cluster so it has predictable 128 * content in case of future spec changes */ 129 clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; 130 assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen) == 0); 131 ret = bdrv_pwrite_zeroes(bs->file, 132 ret + headerlen, 133 clusterlen - headerlen, 0); 134 if (ret < 0) { 135 error_setg_errno(errp, -ret, "Could not zero fill encryption header"); 136 return -1; 137 } 138 139 return ret; 140 } 141 142 143 static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, 144 const uint8_t *buf, size_t buflen, 145 void *opaque, Error **errp) 146 { 147 BlockDriverState *bs = opaque; 148 BDRVQcow2State *s = bs->opaque; 149 ssize_t ret; 150 151 if ((offset + buflen) > s->crypto_header.length) { 152 error_setg(errp, "Request for data outside of extension header"); 153 return -1; 154 } 155 156 ret = bdrv_pwrite(bs->file, 157 s->crypto_header.offset + offset, buf, buflen); 158 if (ret < 0) { 159 error_setg_errno(errp, -ret, "Could not read encryption header"); 160 return -1; 161 } 162 return ret; 163 } 164 165 166 /* 167 * read qcow2 extension and fill bs 168 * start reading from start_offset 169 * finish reading upon magic of value 0 or when end_offset reached 170 * unknown magic is skipped (future extension this version knows nothing about) 171 * return 0 upon success, non-0 otherwise 172 */ 173 static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, 174 uint64_t end_offset, void **p_feature_table, 175 int flags, bool *need_update_header, 176 Error **errp) 177 { 178 BDRVQcow2State *s = bs->opaque; 179 QCowExtension ext; 180 uint64_t offset; 181 int ret; 182 Qcow2BitmapHeaderExt bitmaps_ext; 183 184 if (need_update_header != NULL) { 185 *need_update_header = false; 186 } 187 188 #ifdef DEBUG_EXT 189 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); 190 #endif 191 offset = start_offset; 192 while (offset < end_offset) { 193 194 #ifdef DEBUG_EXT 195 /* Sanity check */ 196 if (offset > s->cluster_size) 197 printf("qcow2_read_extension: suspicious offset %lu\n", offset); 198 199 printf("attempting to read extended header in offset %lu\n", offset); 200 #endif 201 202 ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext)); 203 if (ret < 0) { 204 error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " 205 "pread fail from offset %" PRIu64, offset); 206 return 1; 207 } 208 be32_to_cpus(&ext.magic); 209 be32_to_cpus(&ext.len); 210 offset += sizeof(ext); 211 #ifdef DEBUG_EXT 212 printf("ext.magic = 0x%x\n", ext.magic); 213 #endif 214 if (offset > end_offset || ext.len > end_offset - offset) { 215 error_setg(errp, "Header extension too large"); 216 return -EINVAL; 217 } 218 219 switch (ext.magic) { 220 case QCOW2_EXT_MAGIC_END: 221 return 0; 222 223 case QCOW2_EXT_MAGIC_BACKING_FORMAT: 224 if (ext.len >= sizeof(bs->backing_format)) { 225 error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32 226 " too large (>=%zu)", ext.len, 227 sizeof(bs->backing_format)); 228 return 2; 229 } 230 ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len); 231 if (ret < 0) { 232 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " 233 "Could not read format name"); 234 return 3; 235 } 236 bs->backing_format[ext.len] = '\0'; 237 s->image_backing_format = g_strdup(bs->backing_format); 238 #ifdef DEBUG_EXT 239 printf("Qcow2: Got format extension %s\n", bs->backing_format); 240 #endif 241 break; 242 243 case QCOW2_EXT_MAGIC_FEATURE_TABLE: 244 if (p_feature_table != NULL) { 245 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); 246 ret = bdrv_pread(bs->file, offset , feature_table, ext.len); 247 if (ret < 0) { 248 error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " 249 "Could not read table"); 250 return ret; 251 } 252 253 *p_feature_table = feature_table; 254 } 255 break; 256 257 case QCOW2_EXT_MAGIC_CRYPTO_HEADER: { 258 unsigned int cflags = 0; 259 if (s->crypt_method_header != QCOW_CRYPT_LUKS) { 260 error_setg(errp, "CRYPTO header extension only " 261 "expected with LUKS encryption method"); 262 return -EINVAL; 263 } 264 if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) { 265 error_setg(errp, "CRYPTO header extension size %u, " 266 "but expected size %zu", ext.len, 267 sizeof(Qcow2CryptoHeaderExtension)); 268 return -EINVAL; 269 } 270 271 ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len); 272 if (ret < 0) { 273 error_setg_errno(errp, -ret, 274 "Unable to read CRYPTO header extension"); 275 return ret; 276 } 277 be64_to_cpus(&s->crypto_header.offset); 278 be64_to_cpus(&s->crypto_header.length); 279 280 if ((s->crypto_header.offset % s->cluster_size) != 0) { 281 error_setg(errp, "Encryption header offset '%" PRIu64 "' is " 282 "not a multiple of cluster size '%u'", 283 s->crypto_header.offset, s->cluster_size); 284 return -EINVAL; 285 } 286 287 if (flags & BDRV_O_NO_IO) { 288 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 289 } 290 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 291 qcow2_crypto_hdr_read_func, 292 bs, cflags, errp); 293 if (!s->crypto) { 294 return -EINVAL; 295 } 296 } break; 297 298 case QCOW2_EXT_MAGIC_BITMAPS: 299 if (ext.len != sizeof(bitmaps_ext)) { 300 error_setg_errno(errp, -ret, "bitmaps_ext: " 301 "Invalid extension length"); 302 return -EINVAL; 303 } 304 305 if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) { 306 if (s->qcow_version < 3) { 307 /* Let's be a bit more specific */ 308 warn_report("This qcow2 v2 image contains bitmaps, but " 309 "they may have been modified by a program " 310 "without persistent bitmap support; so now " 311 "they must all be considered inconsistent"); 312 } else { 313 warn_report("a program lacking bitmap support " 314 "modified this file, so all bitmaps are now " 315 "considered inconsistent"); 316 } 317 error_printf("Some clusters may be leaked, " 318 "run 'qemu-img check -r' on the image " 319 "file to fix."); 320 if (need_update_header != NULL) { 321 /* Updating is needed to drop invalid bitmap extension. */ 322 *need_update_header = true; 323 } 324 break; 325 } 326 327 ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len); 328 if (ret < 0) { 329 error_setg_errno(errp, -ret, "bitmaps_ext: " 330 "Could not read ext header"); 331 return ret; 332 } 333 334 if (bitmaps_ext.reserved32 != 0) { 335 error_setg_errno(errp, -ret, "bitmaps_ext: " 336 "Reserved field is not zero"); 337 return -EINVAL; 338 } 339 340 be32_to_cpus(&bitmaps_ext.nb_bitmaps); 341 be64_to_cpus(&bitmaps_ext.bitmap_directory_size); 342 be64_to_cpus(&bitmaps_ext.bitmap_directory_offset); 343 344 if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) { 345 error_setg(errp, 346 "bitmaps_ext: Image has %" PRIu32 " bitmaps, " 347 "exceeding the QEMU supported maximum of %d", 348 bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS); 349 return -EINVAL; 350 } 351 352 if (bitmaps_ext.nb_bitmaps == 0) { 353 error_setg(errp, "found bitmaps extension with zero bitmaps"); 354 return -EINVAL; 355 } 356 357 if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) { 358 error_setg(errp, "bitmaps_ext: " 359 "invalid bitmap directory offset"); 360 return -EINVAL; 361 } 362 363 if (bitmaps_ext.bitmap_directory_size > 364 QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { 365 error_setg(errp, "bitmaps_ext: " 366 "bitmap directory size (%" PRIu64 ") exceeds " 367 "the maximum supported size (%d)", 368 bitmaps_ext.bitmap_directory_size, 369 QCOW2_MAX_BITMAP_DIRECTORY_SIZE); 370 return -EINVAL; 371 } 372 373 s->nb_bitmaps = bitmaps_ext.nb_bitmaps; 374 s->bitmap_directory_offset = 375 bitmaps_ext.bitmap_directory_offset; 376 s->bitmap_directory_size = 377 bitmaps_ext.bitmap_directory_size; 378 379 #ifdef DEBUG_EXT 380 printf("Qcow2: Got bitmaps extension: " 381 "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n", 382 s->bitmap_directory_offset, s->nb_bitmaps); 383 #endif 384 break; 385 386 default: 387 /* unknown magic - save it in case we need to rewrite the header */ 388 /* If you add a new feature, make sure to also update the fast 389 * path of qcow2_make_empty() to deal with it. */ 390 { 391 Qcow2UnknownHeaderExtension *uext; 392 393 uext = g_malloc0(sizeof(*uext) + ext.len); 394 uext->magic = ext.magic; 395 uext->len = ext.len; 396 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); 397 398 ret = bdrv_pread(bs->file, offset , uext->data, uext->len); 399 if (ret < 0) { 400 error_setg_errno(errp, -ret, "ERROR: unknown extension: " 401 "Could not read data"); 402 return ret; 403 } 404 } 405 break; 406 } 407 408 offset += ((ext.len + 7) & ~7); 409 } 410 411 return 0; 412 } 413 414 static void cleanup_unknown_header_ext(BlockDriverState *bs) 415 { 416 BDRVQcow2State *s = bs->opaque; 417 Qcow2UnknownHeaderExtension *uext, *next; 418 419 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { 420 QLIST_REMOVE(uext, next); 421 g_free(uext); 422 } 423 } 424 425 static void report_unsupported_feature(Error **errp, Qcow2Feature *table, 426 uint64_t mask) 427 { 428 char *features = g_strdup(""); 429 char *old; 430 431 while (table && table->name[0] != '\0') { 432 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { 433 if (mask & (1ULL << table->bit)) { 434 old = features; 435 features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "", 436 table->name); 437 g_free(old); 438 mask &= ~(1ULL << table->bit); 439 } 440 } 441 table++; 442 } 443 444 if (mask) { 445 old = features; 446 features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64, 447 old, *old ? ", " : "", mask); 448 g_free(old); 449 } 450 451 error_setg(errp, "Unsupported qcow2 feature(s): %s", features); 452 g_free(features); 453 } 454 455 /* 456 * Sets the dirty bit and flushes afterwards if necessary. 457 * 458 * The incompatible_features bit is only set if the image file header was 459 * updated successfully. Therefore it is not required to check the return 460 * value of this function. 461 */ 462 int qcow2_mark_dirty(BlockDriverState *bs) 463 { 464 BDRVQcow2State *s = bs->opaque; 465 uint64_t val; 466 int ret; 467 468 assert(s->qcow_version >= 3); 469 470 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 471 return 0; /* already dirty */ 472 } 473 474 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); 475 ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), 476 &val, sizeof(val)); 477 if (ret < 0) { 478 return ret; 479 } 480 ret = bdrv_flush(bs->file->bs); 481 if (ret < 0) { 482 return ret; 483 } 484 485 /* Only treat image as dirty if the header was updated successfully */ 486 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY; 487 return 0; 488 } 489 490 /* 491 * Clears the dirty bit and flushes before if necessary. Only call this 492 * function when there are no pending requests, it does not guard against 493 * concurrent requests dirtying the image. 494 */ 495 static int qcow2_mark_clean(BlockDriverState *bs) 496 { 497 BDRVQcow2State *s = bs->opaque; 498 499 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 500 int ret; 501 502 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY; 503 504 ret = qcow2_flush_caches(bs); 505 if (ret < 0) { 506 return ret; 507 } 508 509 return qcow2_update_header(bs); 510 } 511 return 0; 512 } 513 514 /* 515 * Marks the image as corrupt. 516 */ 517 int qcow2_mark_corrupt(BlockDriverState *bs) 518 { 519 BDRVQcow2State *s = bs->opaque; 520 521 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; 522 return qcow2_update_header(bs); 523 } 524 525 /* 526 * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes 527 * before if necessary. 528 */ 529 int qcow2_mark_consistent(BlockDriverState *bs) 530 { 531 BDRVQcow2State *s = bs->opaque; 532 533 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 534 int ret = qcow2_flush_caches(bs); 535 if (ret < 0) { 536 return ret; 537 } 538 539 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT; 540 return qcow2_update_header(bs); 541 } 542 return 0; 543 } 544 545 static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs, 546 BdrvCheckResult *result, 547 BdrvCheckMode fix) 548 { 549 int ret = qcow2_check_refcounts(bs, result, fix); 550 if (ret < 0) { 551 return ret; 552 } 553 554 if (fix && result->check_errors == 0 && result->corruptions == 0) { 555 ret = qcow2_mark_clean(bs); 556 if (ret < 0) { 557 return ret; 558 } 559 return qcow2_mark_consistent(bs); 560 } 561 return ret; 562 } 563 564 static int coroutine_fn qcow2_co_check(BlockDriverState *bs, 565 BdrvCheckResult *result, 566 BdrvCheckMode fix) 567 { 568 BDRVQcow2State *s = bs->opaque; 569 int ret; 570 571 qemu_co_mutex_lock(&s->lock); 572 ret = qcow2_co_check_locked(bs, result, fix); 573 qemu_co_mutex_unlock(&s->lock); 574 return ret; 575 } 576 577 int qcow2_validate_table(BlockDriverState *bs, uint64_t offset, 578 uint64_t entries, size_t entry_len, 579 int64_t max_size_bytes, const char *table_name, 580 Error **errp) 581 { 582 BDRVQcow2State *s = bs->opaque; 583 584 if (entries > max_size_bytes / entry_len) { 585 error_setg(errp, "%s too large", table_name); 586 return -EFBIG; 587 } 588 589 /* Use signed INT64_MAX as the maximum even for uint64_t header fields, 590 * because values will be passed to qemu functions taking int64_t. */ 591 if ((INT64_MAX - entries * entry_len < offset) || 592 (offset_into_cluster(s, offset) != 0)) { 593 error_setg(errp, "%s offset invalid", table_name); 594 return -EINVAL; 595 } 596 597 return 0; 598 } 599 600 static QemuOptsList qcow2_runtime_opts = { 601 .name = "qcow2", 602 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), 603 .desc = { 604 { 605 .name = QCOW2_OPT_LAZY_REFCOUNTS, 606 .type = QEMU_OPT_BOOL, 607 .help = "Postpone refcount updates", 608 }, 609 { 610 .name = QCOW2_OPT_DISCARD_REQUEST, 611 .type = QEMU_OPT_BOOL, 612 .help = "Pass guest discard requests to the layer below", 613 }, 614 { 615 .name = QCOW2_OPT_DISCARD_SNAPSHOT, 616 .type = QEMU_OPT_BOOL, 617 .help = "Generate discard requests when snapshot related space " 618 "is freed", 619 }, 620 { 621 .name = QCOW2_OPT_DISCARD_OTHER, 622 .type = QEMU_OPT_BOOL, 623 .help = "Generate discard requests when other clusters are freed", 624 }, 625 { 626 .name = QCOW2_OPT_OVERLAP, 627 .type = QEMU_OPT_STRING, 628 .help = "Selects which overlap checks to perform from a range of " 629 "templates (none, constant, cached, all)", 630 }, 631 { 632 .name = QCOW2_OPT_OVERLAP_TEMPLATE, 633 .type = QEMU_OPT_STRING, 634 .help = "Selects which overlap checks to perform from a range of " 635 "templates (none, constant, cached, all)", 636 }, 637 { 638 .name = QCOW2_OPT_OVERLAP_MAIN_HEADER, 639 .type = QEMU_OPT_BOOL, 640 .help = "Check for unintended writes into the main qcow2 header", 641 }, 642 { 643 .name = QCOW2_OPT_OVERLAP_ACTIVE_L1, 644 .type = QEMU_OPT_BOOL, 645 .help = "Check for unintended writes into the active L1 table", 646 }, 647 { 648 .name = QCOW2_OPT_OVERLAP_ACTIVE_L2, 649 .type = QEMU_OPT_BOOL, 650 .help = "Check for unintended writes into an active L2 table", 651 }, 652 { 653 .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 654 .type = QEMU_OPT_BOOL, 655 .help = "Check for unintended writes into the refcount table", 656 }, 657 { 658 .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 659 .type = QEMU_OPT_BOOL, 660 .help = "Check for unintended writes into a refcount block", 661 }, 662 { 663 .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 664 .type = QEMU_OPT_BOOL, 665 .help = "Check for unintended writes into the snapshot table", 666 }, 667 { 668 .name = QCOW2_OPT_OVERLAP_INACTIVE_L1, 669 .type = QEMU_OPT_BOOL, 670 .help = "Check for unintended writes into an inactive L1 table", 671 }, 672 { 673 .name = QCOW2_OPT_OVERLAP_INACTIVE_L2, 674 .type = QEMU_OPT_BOOL, 675 .help = "Check for unintended writes into an inactive L2 table", 676 }, 677 { 678 .name = QCOW2_OPT_CACHE_SIZE, 679 .type = QEMU_OPT_SIZE, 680 .help = "Maximum combined metadata (L2 tables and refcount blocks) " 681 "cache size", 682 }, 683 { 684 .name = QCOW2_OPT_L2_CACHE_SIZE, 685 .type = QEMU_OPT_SIZE, 686 .help = "Maximum L2 table cache size", 687 }, 688 { 689 .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE, 690 .type = QEMU_OPT_SIZE, 691 .help = "Size of each entry in the L2 cache", 692 }, 693 { 694 .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE, 695 .type = QEMU_OPT_SIZE, 696 .help = "Maximum refcount block cache size", 697 }, 698 { 699 .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, 700 .type = QEMU_OPT_NUMBER, 701 .help = "Clean unused cache entries after this time (in seconds)", 702 }, 703 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", 704 "ID of secret providing qcow2 AES key or LUKS passphrase"), 705 { /* end of list */ } 706 }, 707 }; 708 709 static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { 710 [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER, 711 [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1, 712 [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2, 713 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 714 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 715 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 716 [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1, 717 [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, 718 }; 719 720 static void cache_clean_timer_cb(void *opaque) 721 { 722 BlockDriverState *bs = opaque; 723 BDRVQcow2State *s = bs->opaque; 724 qcow2_cache_clean_unused(s->l2_table_cache); 725 qcow2_cache_clean_unused(s->refcount_block_cache); 726 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 727 (int64_t) s->cache_clean_interval * 1000); 728 } 729 730 static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) 731 { 732 BDRVQcow2State *s = bs->opaque; 733 if (s->cache_clean_interval > 0) { 734 s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, 735 SCALE_MS, cache_clean_timer_cb, 736 bs); 737 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 738 (int64_t) s->cache_clean_interval * 1000); 739 } 740 } 741 742 static void cache_clean_timer_del(BlockDriverState *bs) 743 { 744 BDRVQcow2State *s = bs->opaque; 745 if (s->cache_clean_timer) { 746 timer_del(s->cache_clean_timer); 747 timer_free(s->cache_clean_timer); 748 s->cache_clean_timer = NULL; 749 } 750 } 751 752 static void qcow2_detach_aio_context(BlockDriverState *bs) 753 { 754 cache_clean_timer_del(bs); 755 } 756 757 static void qcow2_attach_aio_context(BlockDriverState *bs, 758 AioContext *new_context) 759 { 760 cache_clean_timer_init(bs, new_context); 761 } 762 763 static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, 764 uint64_t *l2_cache_size, 765 uint64_t *l2_cache_entry_size, 766 uint64_t *refcount_cache_size, Error **errp) 767 { 768 BDRVQcow2State *s = bs->opaque; 769 uint64_t combined_cache_size; 770 bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; 771 772 combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); 773 l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); 774 refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 775 776 combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0); 777 *l2_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 0); 778 *refcount_cache_size = qemu_opt_get_size(opts, 779 QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); 780 781 *l2_cache_entry_size = qemu_opt_get_size( 782 opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size); 783 784 if (combined_cache_size_set) { 785 if (l2_cache_size_set && refcount_cache_size_set) { 786 error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE 787 " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set " 788 "the same time"); 789 return; 790 } else if (*l2_cache_size > combined_cache_size) { 791 error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed " 792 QCOW2_OPT_CACHE_SIZE); 793 return; 794 } else if (*refcount_cache_size > combined_cache_size) { 795 error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed " 796 QCOW2_OPT_CACHE_SIZE); 797 return; 798 } 799 800 if (l2_cache_size_set) { 801 *refcount_cache_size = combined_cache_size - *l2_cache_size; 802 } else if (refcount_cache_size_set) { 803 *l2_cache_size = combined_cache_size - *refcount_cache_size; 804 } else { 805 uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; 806 uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); 807 uint64_t min_refcount_cache = 808 (uint64_t) MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; 809 810 /* Assign as much memory as possible to the L2 cache, and 811 * use the remainder for the refcount cache */ 812 if (combined_cache_size >= max_l2_cache + min_refcount_cache) { 813 *l2_cache_size = max_l2_cache; 814 *refcount_cache_size = combined_cache_size - *l2_cache_size; 815 } else { 816 *refcount_cache_size = 817 MIN(combined_cache_size, min_refcount_cache); 818 *l2_cache_size = combined_cache_size - *refcount_cache_size; 819 } 820 } 821 } else { 822 if (!l2_cache_size_set) { 823 *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE, 824 (uint64_t)DEFAULT_L2_CACHE_CLUSTERS 825 * s->cluster_size); 826 } 827 if (!refcount_cache_size_set) { 828 *refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; 829 } 830 } 831 832 if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) || 833 *l2_cache_entry_size > s->cluster_size || 834 !is_power_of_2(*l2_cache_entry_size)) { 835 error_setg(errp, "L2 cache entry size must be a power of two " 836 "between %d and the cluster size (%d)", 837 1 << MIN_CLUSTER_BITS, s->cluster_size); 838 return; 839 } 840 } 841 842 typedef struct Qcow2ReopenState { 843 Qcow2Cache *l2_table_cache; 844 Qcow2Cache *refcount_block_cache; 845 int l2_slice_size; /* Number of entries in a slice of the L2 table */ 846 bool use_lazy_refcounts; 847 int overlap_check; 848 bool discard_passthrough[QCOW2_DISCARD_MAX]; 849 uint64_t cache_clean_interval; 850 QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ 851 } Qcow2ReopenState; 852 853 static int qcow2_update_options_prepare(BlockDriverState *bs, 854 Qcow2ReopenState *r, 855 QDict *options, int flags, 856 Error **errp) 857 { 858 BDRVQcow2State *s = bs->opaque; 859 QemuOpts *opts = NULL; 860 const char *opt_overlap_check, *opt_overlap_check_template; 861 int overlap_check_template = 0; 862 uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size; 863 int i; 864 const char *encryptfmt; 865 QDict *encryptopts = NULL; 866 Error *local_err = NULL; 867 int ret; 868 869 qdict_extract_subqdict(options, &encryptopts, "encrypt."); 870 encryptfmt = qdict_get_try_str(encryptopts, "format"); 871 872 opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); 873 qemu_opts_absorb_qdict(opts, options, &local_err); 874 if (local_err) { 875 error_propagate(errp, local_err); 876 ret = -EINVAL; 877 goto fail; 878 } 879 880 /* get L2 table/refcount block cache size from command line options */ 881 read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size, 882 &refcount_cache_size, &local_err); 883 if (local_err) { 884 error_propagate(errp, local_err); 885 ret = -EINVAL; 886 goto fail; 887 } 888 889 l2_cache_size /= l2_cache_entry_size; 890 if (l2_cache_size < MIN_L2_CACHE_SIZE) { 891 l2_cache_size = MIN_L2_CACHE_SIZE; 892 } 893 if (l2_cache_size > INT_MAX) { 894 error_setg(errp, "L2 cache size too big"); 895 ret = -EINVAL; 896 goto fail; 897 } 898 899 refcount_cache_size /= s->cluster_size; 900 if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { 901 refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; 902 } 903 if (refcount_cache_size > INT_MAX) { 904 error_setg(errp, "Refcount cache size too big"); 905 ret = -EINVAL; 906 goto fail; 907 } 908 909 /* alloc new L2 table/refcount block cache, flush old one */ 910 if (s->l2_table_cache) { 911 ret = qcow2_cache_flush(bs, s->l2_table_cache); 912 if (ret) { 913 error_setg_errno(errp, -ret, "Failed to flush the L2 table cache"); 914 goto fail; 915 } 916 } 917 918 if (s->refcount_block_cache) { 919 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 920 if (ret) { 921 error_setg_errno(errp, -ret, 922 "Failed to flush the refcount block cache"); 923 goto fail; 924 } 925 } 926 927 r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t); 928 r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size, 929 l2_cache_entry_size); 930 r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size, 931 s->cluster_size); 932 if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { 933 error_setg(errp, "Could not allocate metadata caches"); 934 ret = -ENOMEM; 935 goto fail; 936 } 937 938 /* New interval for cache cleanup timer */ 939 r->cache_clean_interval = 940 qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 941 s->cache_clean_interval); 942 #ifndef CONFIG_LINUX 943 if (r->cache_clean_interval != 0) { 944 error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL 945 " not supported on this host"); 946 ret = -EINVAL; 947 goto fail; 948 } 949 #endif 950 if (r->cache_clean_interval > UINT_MAX) { 951 error_setg(errp, "Cache clean interval too big"); 952 ret = -EINVAL; 953 goto fail; 954 } 955 956 /* lazy-refcounts; flush if going from enabled to disabled */ 957 r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, 958 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); 959 if (r->use_lazy_refcounts && s->qcow_version < 3) { 960 error_setg(errp, "Lazy refcounts require a qcow2 image with at least " 961 "qemu 1.1 compatibility level"); 962 ret = -EINVAL; 963 goto fail; 964 } 965 966 if (s->use_lazy_refcounts && !r->use_lazy_refcounts) { 967 ret = qcow2_mark_clean(bs); 968 if (ret < 0) { 969 error_setg_errno(errp, -ret, "Failed to disable lazy refcounts"); 970 goto fail; 971 } 972 } 973 974 /* Overlap check options */ 975 opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); 976 opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); 977 if (opt_overlap_check_template && opt_overlap_check && 978 strcmp(opt_overlap_check_template, opt_overlap_check)) 979 { 980 error_setg(errp, "Conflicting values for qcow2 options '" 981 QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE 982 "' ('%s')", opt_overlap_check, opt_overlap_check_template); 983 ret = -EINVAL; 984 goto fail; 985 } 986 if (!opt_overlap_check) { 987 opt_overlap_check = opt_overlap_check_template ?: "cached"; 988 } 989 990 if (!strcmp(opt_overlap_check, "none")) { 991 overlap_check_template = 0; 992 } else if (!strcmp(opt_overlap_check, "constant")) { 993 overlap_check_template = QCOW2_OL_CONSTANT; 994 } else if (!strcmp(opt_overlap_check, "cached")) { 995 overlap_check_template = QCOW2_OL_CACHED; 996 } else if (!strcmp(opt_overlap_check, "all")) { 997 overlap_check_template = QCOW2_OL_ALL; 998 } else { 999 error_setg(errp, "Unsupported value '%s' for qcow2 option " 1000 "'overlap-check'. Allowed are any of the following: " 1001 "none, constant, cached, all", opt_overlap_check); 1002 ret = -EINVAL; 1003 goto fail; 1004 } 1005 1006 r->overlap_check = 0; 1007 for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { 1008 /* overlap-check defines a template bitmask, but every flag may be 1009 * overwritten through the associated boolean option */ 1010 r->overlap_check |= 1011 qemu_opt_get_bool(opts, overlap_bool_option_names[i], 1012 overlap_check_template & (1 << i)) << i; 1013 } 1014 1015 r->discard_passthrough[QCOW2_DISCARD_NEVER] = false; 1016 r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; 1017 r->discard_passthrough[QCOW2_DISCARD_REQUEST] = 1018 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, 1019 flags & BDRV_O_UNMAP); 1020 r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = 1021 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); 1022 r->discard_passthrough[QCOW2_DISCARD_OTHER] = 1023 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); 1024 1025 switch (s->crypt_method_header) { 1026 case QCOW_CRYPT_NONE: 1027 if (encryptfmt) { 1028 error_setg(errp, "No encryption in image header, but options " 1029 "specified format '%s'", encryptfmt); 1030 ret = -EINVAL; 1031 goto fail; 1032 } 1033 break; 1034 1035 case QCOW_CRYPT_AES: 1036 if (encryptfmt && !g_str_equal(encryptfmt, "aes")) { 1037 error_setg(errp, 1038 "Header reported 'aes' encryption format but " 1039 "options specify '%s'", encryptfmt); 1040 ret = -EINVAL; 1041 goto fail; 1042 } 1043 qdict_del(encryptopts, "format"); 1044 r->crypto_opts = block_crypto_open_opts_init( 1045 Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp); 1046 break; 1047 1048 case QCOW_CRYPT_LUKS: 1049 if (encryptfmt && !g_str_equal(encryptfmt, "luks")) { 1050 error_setg(errp, 1051 "Header reported 'luks' encryption format but " 1052 "options specify '%s'", encryptfmt); 1053 ret = -EINVAL; 1054 goto fail; 1055 } 1056 qdict_del(encryptopts, "format"); 1057 r->crypto_opts = block_crypto_open_opts_init( 1058 Q_CRYPTO_BLOCK_FORMAT_LUKS, encryptopts, errp); 1059 break; 1060 1061 default: 1062 error_setg(errp, "Unsupported encryption method %d", 1063 s->crypt_method_header); 1064 break; 1065 } 1066 if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) { 1067 ret = -EINVAL; 1068 goto fail; 1069 } 1070 1071 ret = 0; 1072 fail: 1073 qobject_unref(encryptopts); 1074 qemu_opts_del(opts); 1075 opts = NULL; 1076 return ret; 1077 } 1078 1079 static void qcow2_update_options_commit(BlockDriverState *bs, 1080 Qcow2ReopenState *r) 1081 { 1082 BDRVQcow2State *s = bs->opaque; 1083 int i; 1084 1085 if (s->l2_table_cache) { 1086 qcow2_cache_destroy(s->l2_table_cache); 1087 } 1088 if (s->refcount_block_cache) { 1089 qcow2_cache_destroy(s->refcount_block_cache); 1090 } 1091 s->l2_table_cache = r->l2_table_cache; 1092 s->refcount_block_cache = r->refcount_block_cache; 1093 s->l2_slice_size = r->l2_slice_size; 1094 1095 s->overlap_check = r->overlap_check; 1096 s->use_lazy_refcounts = r->use_lazy_refcounts; 1097 1098 for (i = 0; i < QCOW2_DISCARD_MAX; i++) { 1099 s->discard_passthrough[i] = r->discard_passthrough[i]; 1100 } 1101 1102 if (s->cache_clean_interval != r->cache_clean_interval) { 1103 cache_clean_timer_del(bs); 1104 s->cache_clean_interval = r->cache_clean_interval; 1105 cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); 1106 } 1107 1108 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 1109 s->crypto_opts = r->crypto_opts; 1110 } 1111 1112 static void qcow2_update_options_abort(BlockDriverState *bs, 1113 Qcow2ReopenState *r) 1114 { 1115 if (r->l2_table_cache) { 1116 qcow2_cache_destroy(r->l2_table_cache); 1117 } 1118 if (r->refcount_block_cache) { 1119 qcow2_cache_destroy(r->refcount_block_cache); 1120 } 1121 qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); 1122 } 1123 1124 static int qcow2_update_options(BlockDriverState *bs, QDict *options, 1125 int flags, Error **errp) 1126 { 1127 Qcow2ReopenState r = {}; 1128 int ret; 1129 1130 ret = qcow2_update_options_prepare(bs, &r, options, flags, errp); 1131 if (ret >= 0) { 1132 qcow2_update_options_commit(bs, &r); 1133 } else { 1134 qcow2_update_options_abort(bs, &r); 1135 } 1136 1137 return ret; 1138 } 1139 1140 /* Called with s->lock held. */ 1141 static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, 1142 int flags, Error **errp) 1143 { 1144 BDRVQcow2State *s = bs->opaque; 1145 unsigned int len, i; 1146 int ret = 0; 1147 QCowHeader header; 1148 Error *local_err = NULL; 1149 uint64_t ext_end; 1150 uint64_t l1_vm_state_index; 1151 bool update_header = false; 1152 bool header_updated = false; 1153 1154 ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); 1155 if (ret < 0) { 1156 error_setg_errno(errp, -ret, "Could not read qcow2 header"); 1157 goto fail; 1158 } 1159 be32_to_cpus(&header.magic); 1160 be32_to_cpus(&header.version); 1161 be64_to_cpus(&header.backing_file_offset); 1162 be32_to_cpus(&header.backing_file_size); 1163 be64_to_cpus(&header.size); 1164 be32_to_cpus(&header.cluster_bits); 1165 be32_to_cpus(&header.crypt_method); 1166 be64_to_cpus(&header.l1_table_offset); 1167 be32_to_cpus(&header.l1_size); 1168 be64_to_cpus(&header.refcount_table_offset); 1169 be32_to_cpus(&header.refcount_table_clusters); 1170 be64_to_cpus(&header.snapshots_offset); 1171 be32_to_cpus(&header.nb_snapshots); 1172 1173 if (header.magic != QCOW_MAGIC) { 1174 error_setg(errp, "Image is not in qcow2 format"); 1175 ret = -EINVAL; 1176 goto fail; 1177 } 1178 if (header.version < 2 || header.version > 3) { 1179 error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version); 1180 ret = -ENOTSUP; 1181 goto fail; 1182 } 1183 1184 s->qcow_version = header.version; 1185 1186 /* Initialise cluster size */ 1187 if (header.cluster_bits < MIN_CLUSTER_BITS || 1188 header.cluster_bits > MAX_CLUSTER_BITS) { 1189 error_setg(errp, "Unsupported cluster size: 2^%" PRIu32, 1190 header.cluster_bits); 1191 ret = -EINVAL; 1192 goto fail; 1193 } 1194 1195 s->cluster_bits = header.cluster_bits; 1196 s->cluster_size = 1 << s->cluster_bits; 1197 s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS); 1198 1199 /* Initialise version 3 header fields */ 1200 if (header.version == 2) { 1201 header.incompatible_features = 0; 1202 header.compatible_features = 0; 1203 header.autoclear_features = 0; 1204 header.refcount_order = 4; 1205 header.header_length = 72; 1206 } else { 1207 be64_to_cpus(&header.incompatible_features); 1208 be64_to_cpus(&header.compatible_features); 1209 be64_to_cpus(&header.autoclear_features); 1210 be32_to_cpus(&header.refcount_order); 1211 be32_to_cpus(&header.header_length); 1212 1213 if (header.header_length < 104) { 1214 error_setg(errp, "qcow2 header too short"); 1215 ret = -EINVAL; 1216 goto fail; 1217 } 1218 } 1219 1220 if (header.header_length > s->cluster_size) { 1221 error_setg(errp, "qcow2 header exceeds cluster size"); 1222 ret = -EINVAL; 1223 goto fail; 1224 } 1225 1226 if (header.header_length > sizeof(header)) { 1227 s->unknown_header_fields_size = header.header_length - sizeof(header); 1228 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); 1229 ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, 1230 s->unknown_header_fields_size); 1231 if (ret < 0) { 1232 error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " 1233 "fields"); 1234 goto fail; 1235 } 1236 } 1237 1238 if (header.backing_file_offset > s->cluster_size) { 1239 error_setg(errp, "Invalid backing file offset"); 1240 ret = -EINVAL; 1241 goto fail; 1242 } 1243 1244 if (header.backing_file_offset) { 1245 ext_end = header.backing_file_offset; 1246 } else { 1247 ext_end = 1 << header.cluster_bits; 1248 } 1249 1250 /* Handle feature bits */ 1251 s->incompatible_features = header.incompatible_features; 1252 s->compatible_features = header.compatible_features; 1253 s->autoclear_features = header.autoclear_features; 1254 1255 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { 1256 void *feature_table = NULL; 1257 qcow2_read_extensions(bs, header.header_length, ext_end, 1258 &feature_table, flags, NULL, NULL); 1259 report_unsupported_feature(errp, feature_table, 1260 s->incompatible_features & 1261 ~QCOW2_INCOMPAT_MASK); 1262 ret = -ENOTSUP; 1263 g_free(feature_table); 1264 goto fail; 1265 } 1266 1267 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 1268 /* Corrupt images may not be written to unless they are being repaired 1269 */ 1270 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { 1271 error_setg(errp, "qcow2: Image is corrupt; cannot be opened " 1272 "read/write"); 1273 ret = -EACCES; 1274 goto fail; 1275 } 1276 } 1277 1278 /* Check support for various header values */ 1279 if (header.refcount_order > 6) { 1280 error_setg(errp, "Reference count entry width too large; may not " 1281 "exceed 64 bits"); 1282 ret = -EINVAL; 1283 goto fail; 1284 } 1285 s->refcount_order = header.refcount_order; 1286 s->refcount_bits = 1 << s->refcount_order; 1287 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); 1288 s->refcount_max += s->refcount_max - 1; 1289 1290 s->crypt_method_header = header.crypt_method; 1291 if (s->crypt_method_header) { 1292 if (bdrv_uses_whitelist() && 1293 s->crypt_method_header == QCOW_CRYPT_AES) { 1294 error_setg(errp, 1295 "Use of AES-CBC encrypted qcow2 images is no longer " 1296 "supported in system emulators"); 1297 error_append_hint(errp, 1298 "You can use 'qemu-img convert' to convert your " 1299 "image to an alternative supported format, such " 1300 "as unencrypted qcow2, or raw with the LUKS " 1301 "format instead.\n"); 1302 ret = -ENOSYS; 1303 goto fail; 1304 } 1305 1306 if (s->crypt_method_header == QCOW_CRYPT_AES) { 1307 s->crypt_physical_offset = false; 1308 } else { 1309 /* Assuming LUKS and any future crypt methods we 1310 * add will all use physical offsets, due to the 1311 * fact that the alternative is insecure... */ 1312 s->crypt_physical_offset = true; 1313 } 1314 1315 bs->encrypted = true; 1316 } 1317 1318 s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ 1319 s->l2_size = 1 << s->l2_bits; 1320 /* 2^(s->refcount_order - 3) is the refcount width in bytes */ 1321 s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3); 1322 s->refcount_block_size = 1 << s->refcount_block_bits; 1323 bs->total_sectors = header.size / 512; 1324 s->csize_shift = (62 - (s->cluster_bits - 8)); 1325 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; 1326 s->cluster_offset_mask = (1LL << s->csize_shift) - 1; 1327 1328 s->refcount_table_offset = header.refcount_table_offset; 1329 s->refcount_table_size = 1330 header.refcount_table_clusters << (s->cluster_bits - 3); 1331 1332 if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) { 1333 error_setg(errp, "Image does not contain a reference count table"); 1334 ret = -EINVAL; 1335 goto fail; 1336 } 1337 1338 ret = qcow2_validate_table(bs, s->refcount_table_offset, 1339 header.refcount_table_clusters, 1340 s->cluster_size, QCOW_MAX_REFTABLE_SIZE, 1341 "Reference count table", errp); 1342 if (ret < 0) { 1343 goto fail; 1344 } 1345 1346 /* The total size in bytes of the snapshot table is checked in 1347 * qcow2_read_snapshots() because the size of each snapshot is 1348 * variable and we don't know it yet. 1349 * Here we only check the offset and number of snapshots. */ 1350 ret = qcow2_validate_table(bs, header.snapshots_offset, 1351 header.nb_snapshots, 1352 sizeof(QCowSnapshotHeader), 1353 sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS, 1354 "Snapshot table", errp); 1355 if (ret < 0) { 1356 goto fail; 1357 } 1358 1359 /* read the level 1 table */ 1360 ret = qcow2_validate_table(bs, header.l1_table_offset, 1361 header.l1_size, sizeof(uint64_t), 1362 QCOW_MAX_L1_SIZE, "Active L1 table", errp); 1363 if (ret < 0) { 1364 goto fail; 1365 } 1366 s->l1_size = header.l1_size; 1367 s->l1_table_offset = header.l1_table_offset; 1368 1369 l1_vm_state_index = size_to_l1(s, header.size); 1370 if (l1_vm_state_index > INT_MAX) { 1371 error_setg(errp, "Image is too big"); 1372 ret = -EFBIG; 1373 goto fail; 1374 } 1375 s->l1_vm_state_index = l1_vm_state_index; 1376 1377 /* the L1 table must contain at least enough entries to put 1378 header.size bytes */ 1379 if (s->l1_size < s->l1_vm_state_index) { 1380 error_setg(errp, "L1 table is too small"); 1381 ret = -EINVAL; 1382 goto fail; 1383 } 1384 1385 if (s->l1_size > 0) { 1386 s->l1_table = qemu_try_blockalign(bs->file->bs, 1387 ROUND_UP(s->l1_size * sizeof(uint64_t), 512)); 1388 if (s->l1_table == NULL) { 1389 error_setg(errp, "Could not allocate L1 table"); 1390 ret = -ENOMEM; 1391 goto fail; 1392 } 1393 ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, 1394 s->l1_size * sizeof(uint64_t)); 1395 if (ret < 0) { 1396 error_setg_errno(errp, -ret, "Could not read L1 table"); 1397 goto fail; 1398 } 1399 for(i = 0;i < s->l1_size; i++) { 1400 be64_to_cpus(&s->l1_table[i]); 1401 } 1402 } 1403 1404 /* Parse driver-specific options */ 1405 ret = qcow2_update_options(bs, options, flags, errp); 1406 if (ret < 0) { 1407 goto fail; 1408 } 1409 1410 s->cluster_cache_offset = -1; 1411 s->flags = flags; 1412 1413 ret = qcow2_refcount_init(bs); 1414 if (ret != 0) { 1415 error_setg_errno(errp, -ret, "Could not initialize refcount handling"); 1416 goto fail; 1417 } 1418 1419 QLIST_INIT(&s->cluster_allocs); 1420 QTAILQ_INIT(&s->discards); 1421 1422 /* read qcow2 extensions */ 1423 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL, 1424 flags, &update_header, &local_err)) { 1425 error_propagate(errp, local_err); 1426 ret = -EINVAL; 1427 goto fail; 1428 } 1429 1430 /* qcow2_read_extension may have set up the crypto context 1431 * if the crypt method needs a header region, some methods 1432 * don't need header extensions, so must check here 1433 */ 1434 if (s->crypt_method_header && !s->crypto) { 1435 if (s->crypt_method_header == QCOW_CRYPT_AES) { 1436 unsigned int cflags = 0; 1437 if (flags & BDRV_O_NO_IO) { 1438 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 1439 } 1440 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 1441 NULL, NULL, cflags, errp); 1442 if (!s->crypto) { 1443 ret = -EINVAL; 1444 goto fail; 1445 } 1446 } else if (!(flags & BDRV_O_NO_IO)) { 1447 error_setg(errp, "Missing CRYPTO header for crypt method %d", 1448 s->crypt_method_header); 1449 ret = -EINVAL; 1450 goto fail; 1451 } 1452 } 1453 1454 /* read the backing file name */ 1455 if (header.backing_file_offset != 0) { 1456 len = header.backing_file_size; 1457 if (len > MIN(1023, s->cluster_size - header.backing_file_offset) || 1458 len >= sizeof(bs->backing_file)) { 1459 error_setg(errp, "Backing file name too long"); 1460 ret = -EINVAL; 1461 goto fail; 1462 } 1463 ret = bdrv_pread(bs->file, header.backing_file_offset, 1464 bs->backing_file, len); 1465 if (ret < 0) { 1466 error_setg_errno(errp, -ret, "Could not read backing file name"); 1467 goto fail; 1468 } 1469 bs->backing_file[len] = '\0'; 1470 s->image_backing_file = g_strdup(bs->backing_file); 1471 } 1472 1473 /* Internal snapshots */ 1474 s->snapshots_offset = header.snapshots_offset; 1475 s->nb_snapshots = header.nb_snapshots; 1476 1477 ret = qcow2_read_snapshots(bs); 1478 if (ret < 0) { 1479 error_setg_errno(errp, -ret, "Could not read snapshots"); 1480 goto fail; 1481 } 1482 1483 /* Clear unknown autoclear feature bits */ 1484 update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; 1485 update_header = 1486 update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); 1487 if (update_header) { 1488 s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; 1489 } 1490 1491 if (s->dirty_bitmaps_loaded) { 1492 /* It's some kind of reopen. There are no known cases where we need to 1493 * reload bitmaps in such a situation, so it's safer to skip them. 1494 * 1495 * Moreover, if we have some readonly bitmaps and we are reopening for 1496 * rw we should reopen bitmaps correspondingly. 1497 */ 1498 if (bdrv_has_readonly_bitmaps(bs) && 1499 !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) 1500 { 1501 qcow2_reopen_bitmaps_rw_hint(bs, &header_updated, &local_err); 1502 } 1503 } else { 1504 header_updated = qcow2_load_dirty_bitmaps(bs, &local_err); 1505 s->dirty_bitmaps_loaded = true; 1506 } 1507 update_header = update_header && !header_updated; 1508 if (local_err != NULL) { 1509 error_propagate(errp, local_err); 1510 ret = -EINVAL; 1511 goto fail; 1512 } 1513 1514 if (update_header) { 1515 ret = qcow2_update_header(bs); 1516 if (ret < 0) { 1517 error_setg_errno(errp, -ret, "Could not update qcow2 header"); 1518 goto fail; 1519 } 1520 } 1521 1522 bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0; 1523 1524 /* Repair image if dirty */ 1525 if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && 1526 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { 1527 BdrvCheckResult result = {0}; 1528 1529 ret = qcow2_co_check_locked(bs, &result, 1530 BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); 1531 if (ret < 0 || result.check_errors) { 1532 if (ret >= 0) { 1533 ret = -EIO; 1534 } 1535 error_setg_errno(errp, -ret, "Could not repair dirty image"); 1536 goto fail; 1537 } 1538 } 1539 1540 #ifdef DEBUG_ALLOC 1541 { 1542 BdrvCheckResult result = {0}; 1543 qcow2_check_refcounts(bs, &result, 0); 1544 } 1545 #endif 1546 return ret; 1547 1548 fail: 1549 g_free(s->unknown_header_fields); 1550 cleanup_unknown_header_ext(bs); 1551 qcow2_free_snapshots(bs); 1552 qcow2_refcount_close(bs); 1553 qemu_vfree(s->l1_table); 1554 /* else pre-write overlap checks in cache_destroy may crash */ 1555 s->l1_table = NULL; 1556 cache_clean_timer_del(bs); 1557 if (s->l2_table_cache) { 1558 qcow2_cache_destroy(s->l2_table_cache); 1559 } 1560 if (s->refcount_block_cache) { 1561 qcow2_cache_destroy(s->refcount_block_cache); 1562 } 1563 qcrypto_block_free(s->crypto); 1564 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 1565 return ret; 1566 } 1567 1568 typedef struct QCow2OpenCo { 1569 BlockDriverState *bs; 1570 QDict *options; 1571 int flags; 1572 Error **errp; 1573 int ret; 1574 } QCow2OpenCo; 1575 1576 static void coroutine_fn qcow2_open_entry(void *opaque) 1577 { 1578 QCow2OpenCo *qoc = opaque; 1579 BDRVQcow2State *s = qoc->bs->opaque; 1580 1581 qemu_co_mutex_lock(&s->lock); 1582 qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp); 1583 qemu_co_mutex_unlock(&s->lock); 1584 } 1585 1586 static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, 1587 Error **errp) 1588 { 1589 BDRVQcow2State *s = bs->opaque; 1590 QCow2OpenCo qoc = { 1591 .bs = bs, 1592 .options = options, 1593 .flags = flags, 1594 .errp = errp, 1595 .ret = -EINPROGRESS 1596 }; 1597 1598 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 1599 false, errp); 1600 if (!bs->file) { 1601 return -EINVAL; 1602 } 1603 1604 /* Initialise locks */ 1605 qemu_co_mutex_init(&s->lock); 1606 1607 if (qemu_in_coroutine()) { 1608 /* From bdrv_co_create. */ 1609 qcow2_open_entry(&qoc); 1610 } else { 1611 qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc)); 1612 BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); 1613 } 1614 return qoc.ret; 1615 } 1616 1617 static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) 1618 { 1619 BDRVQcow2State *s = bs->opaque; 1620 1621 if (bs->encrypted) { 1622 /* Encryption works on a sector granularity */ 1623 bs->bl.request_alignment = BDRV_SECTOR_SIZE; 1624 } 1625 bs->bl.pwrite_zeroes_alignment = s->cluster_size; 1626 bs->bl.pdiscard_alignment = s->cluster_size; 1627 } 1628 1629 static int qcow2_reopen_prepare(BDRVReopenState *state, 1630 BlockReopenQueue *queue, Error **errp) 1631 { 1632 Qcow2ReopenState *r; 1633 int ret; 1634 1635 r = g_new0(Qcow2ReopenState, 1); 1636 state->opaque = r; 1637 1638 ret = qcow2_update_options_prepare(state->bs, r, state->options, 1639 state->flags, errp); 1640 if (ret < 0) { 1641 goto fail; 1642 } 1643 1644 /* We need to write out any unwritten data if we reopen read-only. */ 1645 if ((state->flags & BDRV_O_RDWR) == 0) { 1646 ret = qcow2_reopen_bitmaps_ro(state->bs, errp); 1647 if (ret < 0) { 1648 goto fail; 1649 } 1650 1651 ret = bdrv_flush(state->bs); 1652 if (ret < 0) { 1653 goto fail; 1654 } 1655 1656 ret = qcow2_mark_clean(state->bs); 1657 if (ret < 0) { 1658 goto fail; 1659 } 1660 } 1661 1662 return 0; 1663 1664 fail: 1665 qcow2_update_options_abort(state->bs, r); 1666 g_free(r); 1667 return ret; 1668 } 1669 1670 static void qcow2_reopen_commit(BDRVReopenState *state) 1671 { 1672 qcow2_update_options_commit(state->bs, state->opaque); 1673 g_free(state->opaque); 1674 } 1675 1676 static void qcow2_reopen_abort(BDRVReopenState *state) 1677 { 1678 qcow2_update_options_abort(state->bs, state->opaque); 1679 g_free(state->opaque); 1680 } 1681 1682 static void qcow2_join_options(QDict *options, QDict *old_options) 1683 { 1684 bool has_new_overlap_template = 1685 qdict_haskey(options, QCOW2_OPT_OVERLAP) || 1686 qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE); 1687 bool has_new_total_cache_size = 1688 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE); 1689 bool has_all_cache_options; 1690 1691 /* New overlap template overrides all old overlap options */ 1692 if (has_new_overlap_template) { 1693 qdict_del(old_options, QCOW2_OPT_OVERLAP); 1694 qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE); 1695 qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER); 1696 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1); 1697 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2); 1698 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE); 1699 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK); 1700 qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE); 1701 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1); 1702 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2); 1703 } 1704 1705 /* New total cache size overrides all old options */ 1706 if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) { 1707 qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE); 1708 qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 1709 } 1710 1711 qdict_join(options, old_options, false); 1712 1713 /* 1714 * If after merging all cache size options are set, an old total size is 1715 * overwritten. Do keep all options, however, if all three are new. The 1716 * resulting error message is what we want to happen. 1717 */ 1718 has_all_cache_options = 1719 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) || 1720 qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) || 1721 qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 1722 1723 if (has_all_cache_options && !has_new_total_cache_size) { 1724 qdict_del(options, QCOW2_OPT_CACHE_SIZE); 1725 } 1726 } 1727 1728 static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, 1729 bool want_zero, 1730 int64_t offset, int64_t count, 1731 int64_t *pnum, int64_t *map, 1732 BlockDriverState **file) 1733 { 1734 BDRVQcow2State *s = bs->opaque; 1735 uint64_t cluster_offset; 1736 int index_in_cluster, ret; 1737 unsigned int bytes; 1738 int status = 0; 1739 1740 bytes = MIN(INT_MAX, count); 1741 qemu_co_mutex_lock(&s->lock); 1742 ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); 1743 qemu_co_mutex_unlock(&s->lock); 1744 if (ret < 0) { 1745 return ret; 1746 } 1747 1748 *pnum = bytes; 1749 1750 if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && 1751 !s->crypto) { 1752 index_in_cluster = offset & (s->cluster_size - 1); 1753 *map = cluster_offset | index_in_cluster; 1754 *file = bs->file->bs; 1755 status |= BDRV_BLOCK_OFFSET_VALID; 1756 } 1757 if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) { 1758 status |= BDRV_BLOCK_ZERO; 1759 } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { 1760 status |= BDRV_BLOCK_DATA; 1761 } 1762 return status; 1763 } 1764 1765 static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, 1766 uint64_t bytes, QEMUIOVector *qiov, 1767 int flags) 1768 { 1769 BDRVQcow2State *s = bs->opaque; 1770 int offset_in_cluster; 1771 int ret; 1772 unsigned int cur_bytes; /* number of bytes in current iteration */ 1773 uint64_t cluster_offset = 0; 1774 uint64_t bytes_done = 0; 1775 QEMUIOVector hd_qiov; 1776 uint8_t *cluster_data = NULL; 1777 1778 qemu_iovec_init(&hd_qiov, qiov->niov); 1779 1780 qemu_co_mutex_lock(&s->lock); 1781 1782 while (bytes != 0) { 1783 1784 /* prepare next request */ 1785 cur_bytes = MIN(bytes, INT_MAX); 1786 if (s->crypto) { 1787 cur_bytes = MIN(cur_bytes, 1788 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 1789 } 1790 1791 ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset); 1792 if (ret < 0) { 1793 goto fail; 1794 } 1795 1796 offset_in_cluster = offset_into_cluster(s, offset); 1797 1798 qemu_iovec_reset(&hd_qiov); 1799 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); 1800 1801 switch (ret) { 1802 case QCOW2_CLUSTER_UNALLOCATED: 1803 1804 if (bs->backing) { 1805 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 1806 qemu_co_mutex_unlock(&s->lock); 1807 ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, 1808 &hd_qiov, 0); 1809 qemu_co_mutex_lock(&s->lock); 1810 if (ret < 0) { 1811 goto fail; 1812 } 1813 } else { 1814 /* Note: in this case, no need to wait */ 1815 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); 1816 } 1817 break; 1818 1819 case QCOW2_CLUSTER_ZERO_PLAIN: 1820 case QCOW2_CLUSTER_ZERO_ALLOC: 1821 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); 1822 break; 1823 1824 case QCOW2_CLUSTER_COMPRESSED: 1825 /* add AIO support for compressed blocks ? */ 1826 ret = qcow2_decompress_cluster(bs, cluster_offset); 1827 if (ret < 0) { 1828 goto fail; 1829 } 1830 1831 qemu_iovec_from_buf(&hd_qiov, 0, 1832 s->cluster_cache + offset_in_cluster, 1833 cur_bytes); 1834 break; 1835 1836 case QCOW2_CLUSTER_NORMAL: 1837 if ((cluster_offset & 511) != 0) { 1838 ret = -EIO; 1839 goto fail; 1840 } 1841 1842 if (bs->encrypted) { 1843 assert(s->crypto); 1844 1845 /* 1846 * For encrypted images, read everything into a temporary 1847 * contiguous buffer on which the AES functions can work. 1848 */ 1849 if (!cluster_data) { 1850 cluster_data = 1851 qemu_try_blockalign(bs->file->bs, 1852 QCOW_MAX_CRYPT_CLUSTERS 1853 * s->cluster_size); 1854 if (cluster_data == NULL) { 1855 ret = -ENOMEM; 1856 goto fail; 1857 } 1858 } 1859 1860 assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 1861 qemu_iovec_reset(&hd_qiov); 1862 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); 1863 } 1864 1865 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 1866 qemu_co_mutex_unlock(&s->lock); 1867 ret = bdrv_co_preadv(bs->file, 1868 cluster_offset + offset_in_cluster, 1869 cur_bytes, &hd_qiov, 0); 1870 qemu_co_mutex_lock(&s->lock); 1871 if (ret < 0) { 1872 goto fail; 1873 } 1874 if (bs->encrypted) { 1875 assert(s->crypto); 1876 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 1877 assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 1878 if (qcrypto_block_decrypt(s->crypto, 1879 (s->crypt_physical_offset ? 1880 cluster_offset + offset_in_cluster : 1881 offset), 1882 cluster_data, 1883 cur_bytes, 1884 NULL) < 0) { 1885 ret = -EIO; 1886 goto fail; 1887 } 1888 qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes); 1889 } 1890 break; 1891 1892 default: 1893 g_assert_not_reached(); 1894 ret = -EIO; 1895 goto fail; 1896 } 1897 1898 bytes -= cur_bytes; 1899 offset += cur_bytes; 1900 bytes_done += cur_bytes; 1901 } 1902 ret = 0; 1903 1904 fail: 1905 qemu_co_mutex_unlock(&s->lock); 1906 1907 qemu_iovec_destroy(&hd_qiov); 1908 qemu_vfree(cluster_data); 1909 1910 return ret; 1911 } 1912 1913 /* Check if it's possible to merge a write request with the writing of 1914 * the data from the COW regions */ 1915 static bool merge_cow(uint64_t offset, unsigned bytes, 1916 QEMUIOVector *hd_qiov, QCowL2Meta *l2meta) 1917 { 1918 QCowL2Meta *m; 1919 1920 for (m = l2meta; m != NULL; m = m->next) { 1921 /* If both COW regions are empty then there's nothing to merge */ 1922 if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) { 1923 continue; 1924 } 1925 1926 /* The data (middle) region must be immediately after the 1927 * start region */ 1928 if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) { 1929 continue; 1930 } 1931 1932 /* The end region must be immediately after the data (middle) 1933 * region */ 1934 if (m->offset + m->cow_end.offset != offset + bytes) { 1935 continue; 1936 } 1937 1938 /* Make sure that adding both COW regions to the QEMUIOVector 1939 * does not exceed IOV_MAX */ 1940 if (hd_qiov->niov > IOV_MAX - 2) { 1941 continue; 1942 } 1943 1944 m->data_qiov = hd_qiov; 1945 return true; 1946 } 1947 1948 return false; 1949 } 1950 1951 static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, 1952 uint64_t bytes, QEMUIOVector *qiov, 1953 int flags) 1954 { 1955 BDRVQcow2State *s = bs->opaque; 1956 int offset_in_cluster; 1957 int ret; 1958 unsigned int cur_bytes; /* number of sectors in current iteration */ 1959 uint64_t cluster_offset; 1960 QEMUIOVector hd_qiov; 1961 uint64_t bytes_done = 0; 1962 uint8_t *cluster_data = NULL; 1963 QCowL2Meta *l2meta = NULL; 1964 1965 trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); 1966 1967 qemu_iovec_init(&hd_qiov, qiov->niov); 1968 1969 s->cluster_cache_offset = -1; /* disable compressed cache */ 1970 1971 qemu_co_mutex_lock(&s->lock); 1972 1973 while (bytes != 0) { 1974 1975 l2meta = NULL; 1976 1977 trace_qcow2_writev_start_part(qemu_coroutine_self()); 1978 offset_in_cluster = offset_into_cluster(s, offset); 1979 cur_bytes = MIN(bytes, INT_MAX); 1980 if (bs->encrypted) { 1981 cur_bytes = MIN(cur_bytes, 1982 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size 1983 - offset_in_cluster); 1984 } 1985 1986 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, 1987 &cluster_offset, &l2meta); 1988 if (ret < 0) { 1989 goto fail; 1990 } 1991 1992 assert((cluster_offset & 511) == 0); 1993 1994 qemu_iovec_reset(&hd_qiov); 1995 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); 1996 1997 if (bs->encrypted) { 1998 assert(s->crypto); 1999 if (!cluster_data) { 2000 cluster_data = qemu_try_blockalign(bs->file->bs, 2001 QCOW_MAX_CRYPT_CLUSTERS 2002 * s->cluster_size); 2003 if (cluster_data == NULL) { 2004 ret = -ENOMEM; 2005 goto fail; 2006 } 2007 } 2008 2009 assert(hd_qiov.size <= 2010 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 2011 qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); 2012 2013 if (qcrypto_block_encrypt(s->crypto, 2014 (s->crypt_physical_offset ? 2015 cluster_offset + offset_in_cluster : 2016 offset), 2017 cluster_data, 2018 cur_bytes, NULL) < 0) { 2019 ret = -EIO; 2020 goto fail; 2021 } 2022 2023 qemu_iovec_reset(&hd_qiov); 2024 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); 2025 } 2026 2027 ret = qcow2_pre_write_overlap_check(bs, 0, 2028 cluster_offset + offset_in_cluster, cur_bytes); 2029 if (ret < 0) { 2030 goto fail; 2031 } 2032 2033 /* If we need to do COW, check if it's possible to merge the 2034 * writing of the guest data together with that of the COW regions. 2035 * If it's not possible (or not necessary) then write the 2036 * guest data now. */ 2037 if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) { 2038 qemu_co_mutex_unlock(&s->lock); 2039 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 2040 trace_qcow2_writev_data(qemu_coroutine_self(), 2041 cluster_offset + offset_in_cluster); 2042 ret = bdrv_co_pwritev(bs->file, 2043 cluster_offset + offset_in_cluster, 2044 cur_bytes, &hd_qiov, 0); 2045 qemu_co_mutex_lock(&s->lock); 2046 if (ret < 0) { 2047 goto fail; 2048 } 2049 } 2050 2051 while (l2meta != NULL) { 2052 QCowL2Meta *next; 2053 2054 ret = qcow2_alloc_cluster_link_l2(bs, l2meta); 2055 if (ret < 0) { 2056 goto fail; 2057 } 2058 2059 /* Take the request off the list of running requests */ 2060 if (l2meta->nb_clusters != 0) { 2061 QLIST_REMOVE(l2meta, next_in_flight); 2062 } 2063 2064 qemu_co_queue_restart_all(&l2meta->dependent_requests); 2065 2066 next = l2meta->next; 2067 g_free(l2meta); 2068 l2meta = next; 2069 } 2070 2071 bytes -= cur_bytes; 2072 offset += cur_bytes; 2073 bytes_done += cur_bytes; 2074 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); 2075 } 2076 ret = 0; 2077 2078 fail: 2079 while (l2meta != NULL) { 2080 QCowL2Meta *next; 2081 2082 if (l2meta->nb_clusters != 0) { 2083 QLIST_REMOVE(l2meta, next_in_flight); 2084 } 2085 qemu_co_queue_restart_all(&l2meta->dependent_requests); 2086 2087 next = l2meta->next; 2088 g_free(l2meta); 2089 l2meta = next; 2090 } 2091 2092 qemu_co_mutex_unlock(&s->lock); 2093 2094 qemu_iovec_destroy(&hd_qiov); 2095 qemu_vfree(cluster_data); 2096 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 2097 2098 return ret; 2099 } 2100 2101 static int qcow2_inactivate(BlockDriverState *bs) 2102 { 2103 BDRVQcow2State *s = bs->opaque; 2104 int ret, result = 0; 2105 Error *local_err = NULL; 2106 2107 qcow2_store_persistent_dirty_bitmaps(bs, &local_err); 2108 if (local_err != NULL) { 2109 result = -EINVAL; 2110 error_report_err(local_err); 2111 error_report("Persistent bitmaps are lost for node '%s'", 2112 bdrv_get_device_or_node_name(bs)); 2113 } 2114 2115 ret = qcow2_cache_flush(bs, s->l2_table_cache); 2116 if (ret) { 2117 result = ret; 2118 error_report("Failed to flush the L2 table cache: %s", 2119 strerror(-ret)); 2120 } 2121 2122 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 2123 if (ret) { 2124 result = ret; 2125 error_report("Failed to flush the refcount block cache: %s", 2126 strerror(-ret)); 2127 } 2128 2129 if (result == 0) { 2130 qcow2_mark_clean(bs); 2131 } 2132 2133 return result; 2134 } 2135 2136 static void qcow2_close(BlockDriverState *bs) 2137 { 2138 BDRVQcow2State *s = bs->opaque; 2139 qemu_vfree(s->l1_table); 2140 /* else pre-write overlap checks in cache_destroy may crash */ 2141 s->l1_table = NULL; 2142 2143 if (!(s->flags & BDRV_O_INACTIVE)) { 2144 qcow2_inactivate(bs); 2145 } 2146 2147 cache_clean_timer_del(bs); 2148 qcow2_cache_destroy(s->l2_table_cache); 2149 qcow2_cache_destroy(s->refcount_block_cache); 2150 2151 qcrypto_block_free(s->crypto); 2152 s->crypto = NULL; 2153 2154 g_free(s->unknown_header_fields); 2155 cleanup_unknown_header_ext(bs); 2156 2157 g_free(s->image_backing_file); 2158 g_free(s->image_backing_format); 2159 2160 g_free(s->cluster_cache); 2161 qemu_vfree(s->cluster_data); 2162 qcow2_refcount_close(bs); 2163 qcow2_free_snapshots(bs); 2164 } 2165 2166 static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs, 2167 Error **errp) 2168 { 2169 BDRVQcow2State *s = bs->opaque; 2170 int flags = s->flags; 2171 QCryptoBlock *crypto = NULL; 2172 QDict *options; 2173 Error *local_err = NULL; 2174 int ret; 2175 2176 /* 2177 * Backing files are read-only which makes all of their metadata immutable, 2178 * that means we don't have to worry about reopening them here. 2179 */ 2180 2181 crypto = s->crypto; 2182 s->crypto = NULL; 2183 2184 qcow2_close(bs); 2185 2186 memset(s, 0, sizeof(BDRVQcow2State)); 2187 options = qdict_clone_shallow(bs->options); 2188 2189 flags &= ~BDRV_O_INACTIVE; 2190 qemu_co_mutex_lock(&s->lock); 2191 ret = qcow2_do_open(bs, options, flags, &local_err); 2192 qemu_co_mutex_unlock(&s->lock); 2193 qobject_unref(options); 2194 if (local_err) { 2195 error_propagate(errp, local_err); 2196 error_prepend(errp, "Could not reopen qcow2 layer: "); 2197 bs->drv = NULL; 2198 return; 2199 } else if (ret < 0) { 2200 error_setg_errno(errp, -ret, "Could not reopen qcow2 layer"); 2201 bs->drv = NULL; 2202 return; 2203 } 2204 2205 s->crypto = crypto; 2206 } 2207 2208 static size_t header_ext_add(char *buf, uint32_t magic, const void *s, 2209 size_t len, size_t buflen) 2210 { 2211 QCowExtension *ext_backing_fmt = (QCowExtension*) buf; 2212 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); 2213 2214 if (buflen < ext_len) { 2215 return -ENOSPC; 2216 } 2217 2218 *ext_backing_fmt = (QCowExtension) { 2219 .magic = cpu_to_be32(magic), 2220 .len = cpu_to_be32(len), 2221 }; 2222 2223 if (len) { 2224 memcpy(buf + sizeof(QCowExtension), s, len); 2225 } 2226 2227 return ext_len; 2228 } 2229 2230 /* 2231 * Updates the qcow2 header, including the variable length parts of it, i.e. 2232 * the backing file name and all extensions. qcow2 was not designed to allow 2233 * such changes, so if we run out of space (we can only use the first cluster) 2234 * this function may fail. 2235 * 2236 * Returns 0 on success, -errno in error cases. 2237 */ 2238 int qcow2_update_header(BlockDriverState *bs) 2239 { 2240 BDRVQcow2State *s = bs->opaque; 2241 QCowHeader *header; 2242 char *buf; 2243 size_t buflen = s->cluster_size; 2244 int ret; 2245 uint64_t total_size; 2246 uint32_t refcount_table_clusters; 2247 size_t header_length; 2248 Qcow2UnknownHeaderExtension *uext; 2249 2250 buf = qemu_blockalign(bs, buflen); 2251 2252 /* Header structure */ 2253 header = (QCowHeader*) buf; 2254 2255 if (buflen < sizeof(*header)) { 2256 ret = -ENOSPC; 2257 goto fail; 2258 } 2259 2260 header_length = sizeof(*header) + s->unknown_header_fields_size; 2261 total_size = bs->total_sectors * BDRV_SECTOR_SIZE; 2262 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); 2263 2264 *header = (QCowHeader) { 2265 /* Version 2 fields */ 2266 .magic = cpu_to_be32(QCOW_MAGIC), 2267 .version = cpu_to_be32(s->qcow_version), 2268 .backing_file_offset = 0, 2269 .backing_file_size = 0, 2270 .cluster_bits = cpu_to_be32(s->cluster_bits), 2271 .size = cpu_to_be64(total_size), 2272 .crypt_method = cpu_to_be32(s->crypt_method_header), 2273 .l1_size = cpu_to_be32(s->l1_size), 2274 .l1_table_offset = cpu_to_be64(s->l1_table_offset), 2275 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset), 2276 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), 2277 .nb_snapshots = cpu_to_be32(s->nb_snapshots), 2278 .snapshots_offset = cpu_to_be64(s->snapshots_offset), 2279 2280 /* Version 3 fields */ 2281 .incompatible_features = cpu_to_be64(s->incompatible_features), 2282 .compatible_features = cpu_to_be64(s->compatible_features), 2283 .autoclear_features = cpu_to_be64(s->autoclear_features), 2284 .refcount_order = cpu_to_be32(s->refcount_order), 2285 .header_length = cpu_to_be32(header_length), 2286 }; 2287 2288 /* For older versions, write a shorter header */ 2289 switch (s->qcow_version) { 2290 case 2: 2291 ret = offsetof(QCowHeader, incompatible_features); 2292 break; 2293 case 3: 2294 ret = sizeof(*header); 2295 break; 2296 default: 2297 ret = -EINVAL; 2298 goto fail; 2299 } 2300 2301 buf += ret; 2302 buflen -= ret; 2303 memset(buf, 0, buflen); 2304 2305 /* Preserve any unknown field in the header */ 2306 if (s->unknown_header_fields_size) { 2307 if (buflen < s->unknown_header_fields_size) { 2308 ret = -ENOSPC; 2309 goto fail; 2310 } 2311 2312 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); 2313 buf += s->unknown_header_fields_size; 2314 buflen -= s->unknown_header_fields_size; 2315 } 2316 2317 /* Backing file format header extension */ 2318 if (s->image_backing_format) { 2319 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, 2320 s->image_backing_format, 2321 strlen(s->image_backing_format), 2322 buflen); 2323 if (ret < 0) { 2324 goto fail; 2325 } 2326 2327 buf += ret; 2328 buflen -= ret; 2329 } 2330 2331 /* Full disk encryption header pointer extension */ 2332 if (s->crypto_header.offset != 0) { 2333 cpu_to_be64s(&s->crypto_header.offset); 2334 cpu_to_be64s(&s->crypto_header.length); 2335 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER, 2336 &s->crypto_header, sizeof(s->crypto_header), 2337 buflen); 2338 be64_to_cpus(&s->crypto_header.offset); 2339 be64_to_cpus(&s->crypto_header.length); 2340 if (ret < 0) { 2341 goto fail; 2342 } 2343 buf += ret; 2344 buflen -= ret; 2345 } 2346 2347 /* Feature table */ 2348 if (s->qcow_version >= 3) { 2349 Qcow2Feature features[] = { 2350 { 2351 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2352 .bit = QCOW2_INCOMPAT_DIRTY_BITNR, 2353 .name = "dirty bit", 2354 }, 2355 { 2356 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2357 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, 2358 .name = "corrupt bit", 2359 }, 2360 { 2361 .type = QCOW2_FEAT_TYPE_COMPATIBLE, 2362 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, 2363 .name = "lazy refcounts", 2364 }, 2365 }; 2366 2367 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, 2368 features, sizeof(features), buflen); 2369 if (ret < 0) { 2370 goto fail; 2371 } 2372 buf += ret; 2373 buflen -= ret; 2374 } 2375 2376 /* Bitmap extension */ 2377 if (s->nb_bitmaps > 0) { 2378 Qcow2BitmapHeaderExt bitmaps_header = { 2379 .nb_bitmaps = cpu_to_be32(s->nb_bitmaps), 2380 .bitmap_directory_size = 2381 cpu_to_be64(s->bitmap_directory_size), 2382 .bitmap_directory_offset = 2383 cpu_to_be64(s->bitmap_directory_offset) 2384 }; 2385 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS, 2386 &bitmaps_header, sizeof(bitmaps_header), 2387 buflen); 2388 if (ret < 0) { 2389 goto fail; 2390 } 2391 buf += ret; 2392 buflen -= ret; 2393 } 2394 2395 /* Keep unknown header extensions */ 2396 QLIST_FOREACH(uext, &s->unknown_header_ext, next) { 2397 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); 2398 if (ret < 0) { 2399 goto fail; 2400 } 2401 2402 buf += ret; 2403 buflen -= ret; 2404 } 2405 2406 /* End of header extensions */ 2407 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); 2408 if (ret < 0) { 2409 goto fail; 2410 } 2411 2412 buf += ret; 2413 buflen -= ret; 2414 2415 /* Backing file name */ 2416 if (s->image_backing_file) { 2417 size_t backing_file_len = strlen(s->image_backing_file); 2418 2419 if (buflen < backing_file_len) { 2420 ret = -ENOSPC; 2421 goto fail; 2422 } 2423 2424 /* Using strncpy is ok here, since buf is not NUL-terminated. */ 2425 strncpy(buf, s->image_backing_file, buflen); 2426 2427 header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); 2428 header->backing_file_size = cpu_to_be32(backing_file_len); 2429 } 2430 2431 /* Write the new header */ 2432 ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); 2433 if (ret < 0) { 2434 goto fail; 2435 } 2436 2437 ret = 0; 2438 fail: 2439 qemu_vfree(header); 2440 return ret; 2441 } 2442 2443 static int qcow2_change_backing_file(BlockDriverState *bs, 2444 const char *backing_file, const char *backing_fmt) 2445 { 2446 BDRVQcow2State *s = bs->opaque; 2447 2448 if (backing_file && strlen(backing_file) > 1023) { 2449 return -EINVAL; 2450 } 2451 2452 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2453 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2454 2455 g_free(s->image_backing_file); 2456 g_free(s->image_backing_format); 2457 2458 s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL; 2459 s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL; 2460 2461 return qcow2_update_header(bs); 2462 } 2463 2464 static int qcow2_crypt_method_from_format(const char *encryptfmt) 2465 { 2466 if (g_str_equal(encryptfmt, "luks")) { 2467 return QCOW_CRYPT_LUKS; 2468 } else if (g_str_equal(encryptfmt, "aes")) { 2469 return QCOW_CRYPT_AES; 2470 } else { 2471 return -EINVAL; 2472 } 2473 } 2474 2475 static int qcow2_set_up_encryption(BlockDriverState *bs, 2476 QCryptoBlockCreateOptions *cryptoopts, 2477 Error **errp) 2478 { 2479 BDRVQcow2State *s = bs->opaque; 2480 QCryptoBlock *crypto = NULL; 2481 int fmt, ret; 2482 2483 switch (cryptoopts->format) { 2484 case Q_CRYPTO_BLOCK_FORMAT_LUKS: 2485 fmt = QCOW_CRYPT_LUKS; 2486 break; 2487 case Q_CRYPTO_BLOCK_FORMAT_QCOW: 2488 fmt = QCOW_CRYPT_AES; 2489 break; 2490 default: 2491 error_setg(errp, "Crypto format not supported in qcow2"); 2492 return -EINVAL; 2493 } 2494 2495 s->crypt_method_header = fmt; 2496 2497 crypto = qcrypto_block_create(cryptoopts, "encrypt.", 2498 qcow2_crypto_hdr_init_func, 2499 qcow2_crypto_hdr_write_func, 2500 bs, errp); 2501 if (!crypto) { 2502 return -EINVAL; 2503 } 2504 2505 ret = qcow2_update_header(bs); 2506 if (ret < 0) { 2507 error_setg_errno(errp, -ret, "Could not write encryption header"); 2508 goto out; 2509 } 2510 2511 ret = 0; 2512 out: 2513 qcrypto_block_free(crypto); 2514 return ret; 2515 } 2516 2517 2518 typedef struct PreallocCo { 2519 BlockDriverState *bs; 2520 uint64_t offset; 2521 uint64_t new_length; 2522 2523 int ret; 2524 } PreallocCo; 2525 2526 /** 2527 * Preallocates metadata structures for data clusters between @offset (in the 2528 * guest disk) and @new_length (which is thus generally the new guest disk 2529 * size). 2530 * 2531 * Returns: 0 on success, -errno on failure. 2532 */ 2533 static void coroutine_fn preallocate_co(void *opaque) 2534 { 2535 PreallocCo *params = opaque; 2536 BlockDriverState *bs = params->bs; 2537 uint64_t offset = params->offset; 2538 uint64_t new_length = params->new_length; 2539 BDRVQcow2State *s = bs->opaque; 2540 uint64_t bytes; 2541 uint64_t host_offset = 0; 2542 unsigned int cur_bytes; 2543 int ret; 2544 QCowL2Meta *meta; 2545 2546 qemu_co_mutex_lock(&s->lock); 2547 2548 assert(offset <= new_length); 2549 bytes = new_length - offset; 2550 2551 while (bytes) { 2552 cur_bytes = MIN(bytes, INT_MAX); 2553 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, 2554 &host_offset, &meta); 2555 if (ret < 0) { 2556 goto done; 2557 } 2558 2559 while (meta) { 2560 QCowL2Meta *next = meta->next; 2561 2562 ret = qcow2_alloc_cluster_link_l2(bs, meta); 2563 if (ret < 0) { 2564 qcow2_free_any_clusters(bs, meta->alloc_offset, 2565 meta->nb_clusters, QCOW2_DISCARD_NEVER); 2566 goto done; 2567 } 2568 2569 /* There are no dependent requests, but we need to remove our 2570 * request from the list of in-flight requests */ 2571 QLIST_REMOVE(meta, next_in_flight); 2572 2573 g_free(meta); 2574 meta = next; 2575 } 2576 2577 /* TODO Preallocate data if requested */ 2578 2579 bytes -= cur_bytes; 2580 offset += cur_bytes; 2581 } 2582 2583 /* 2584 * It is expected that the image file is large enough to actually contain 2585 * all of the allocated clusters (otherwise we get failing reads after 2586 * EOF). Extend the image to the last allocated sector. 2587 */ 2588 if (host_offset != 0) { 2589 uint8_t data = 0; 2590 ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1, 2591 &data, 1); 2592 if (ret < 0) { 2593 goto done; 2594 } 2595 } 2596 2597 ret = 0; 2598 2599 done: 2600 qemu_co_mutex_unlock(&s->lock); 2601 params->ret = ret; 2602 } 2603 2604 static int preallocate(BlockDriverState *bs, 2605 uint64_t offset, uint64_t new_length) 2606 { 2607 PreallocCo params = { 2608 .bs = bs, 2609 .offset = offset, 2610 .new_length = new_length, 2611 .ret = -EINPROGRESS, 2612 }; 2613 2614 if (qemu_in_coroutine()) { 2615 preallocate_co(¶ms); 2616 } else { 2617 Coroutine *co = qemu_coroutine_create(preallocate_co, ¶ms); 2618 bdrv_coroutine_enter(bs, co); 2619 BDRV_POLL_WHILE(bs, params.ret == -EINPROGRESS); 2620 } 2621 return params.ret; 2622 } 2623 2624 /* qcow2_refcount_metadata_size: 2625 * @clusters: number of clusters to refcount (including data and L1/L2 tables) 2626 * @cluster_size: size of a cluster, in bytes 2627 * @refcount_order: refcount bits power-of-2 exponent 2628 * @generous_increase: allow for the refcount table to be 1.5x as large as it 2629 * needs to be 2630 * 2631 * Returns: Number of bytes required for refcount blocks and table metadata. 2632 */ 2633 int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, 2634 int refcount_order, bool generous_increase, 2635 uint64_t *refblock_count) 2636 { 2637 /* 2638 * Every host cluster is reference-counted, including metadata (even 2639 * refcount metadata is recursively included). 2640 * 2641 * An accurate formula for the size of refcount metadata size is difficult 2642 * to derive. An easier method of calculation is finding the fixed point 2643 * where no further refcount blocks or table clusters are required to 2644 * reference count every cluster. 2645 */ 2646 int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t); 2647 int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order); 2648 int64_t table = 0; /* number of refcount table clusters */ 2649 int64_t blocks = 0; /* number of refcount block clusters */ 2650 int64_t last; 2651 int64_t n = 0; 2652 2653 do { 2654 last = n; 2655 blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block); 2656 table = DIV_ROUND_UP(blocks, blocks_per_table_cluster); 2657 n = clusters + blocks + table; 2658 2659 if (n == last && generous_increase) { 2660 clusters += DIV_ROUND_UP(table, 2); 2661 n = 0; /* force another loop */ 2662 generous_increase = false; 2663 } 2664 } while (n != last); 2665 2666 if (refblock_count) { 2667 *refblock_count = blocks; 2668 } 2669 2670 return (blocks + table) * cluster_size; 2671 } 2672 2673 /** 2674 * qcow2_calc_prealloc_size: 2675 * @total_size: virtual disk size in bytes 2676 * @cluster_size: cluster size in bytes 2677 * @refcount_order: refcount bits power-of-2 exponent 2678 * 2679 * Returns: Total number of bytes required for the fully allocated image 2680 * (including metadata). 2681 */ 2682 static int64_t qcow2_calc_prealloc_size(int64_t total_size, 2683 size_t cluster_size, 2684 int refcount_order) 2685 { 2686 int64_t meta_size = 0; 2687 uint64_t nl1e, nl2e; 2688 int64_t aligned_total_size = ROUND_UP(total_size, cluster_size); 2689 2690 /* header: 1 cluster */ 2691 meta_size += cluster_size; 2692 2693 /* total size of L2 tables */ 2694 nl2e = aligned_total_size / cluster_size; 2695 nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t)); 2696 meta_size += nl2e * sizeof(uint64_t); 2697 2698 /* total size of L1 tables */ 2699 nl1e = nl2e * sizeof(uint64_t) / cluster_size; 2700 nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t)); 2701 meta_size += nl1e * sizeof(uint64_t); 2702 2703 /* total size of refcount table and blocks */ 2704 meta_size += qcow2_refcount_metadata_size( 2705 (meta_size + aligned_total_size) / cluster_size, 2706 cluster_size, refcount_order, false, NULL); 2707 2708 return meta_size + aligned_total_size; 2709 } 2710 2711 static bool validate_cluster_size(size_t cluster_size, Error **errp) 2712 { 2713 int cluster_bits = ctz32(cluster_size); 2714 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || 2715 (1 << cluster_bits) != cluster_size) 2716 { 2717 error_setg(errp, "Cluster size must be a power of two between %d and " 2718 "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); 2719 return false; 2720 } 2721 return true; 2722 } 2723 2724 static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp) 2725 { 2726 size_t cluster_size; 2727 2728 cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 2729 DEFAULT_CLUSTER_SIZE); 2730 if (!validate_cluster_size(cluster_size, errp)) { 2731 return 0; 2732 } 2733 return cluster_size; 2734 } 2735 2736 static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp) 2737 { 2738 char *buf; 2739 int ret; 2740 2741 buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL); 2742 if (!buf) { 2743 ret = 3; /* default */ 2744 } else if (!strcmp(buf, "0.10")) { 2745 ret = 2; 2746 } else if (!strcmp(buf, "1.1")) { 2747 ret = 3; 2748 } else { 2749 error_setg(errp, "Invalid compatibility level: '%s'", buf); 2750 ret = -EINVAL; 2751 } 2752 g_free(buf); 2753 return ret; 2754 } 2755 2756 static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, 2757 Error **errp) 2758 { 2759 uint64_t refcount_bits; 2760 2761 refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16); 2762 if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { 2763 error_setg(errp, "Refcount width must be a power of two and may not " 2764 "exceed 64 bits"); 2765 return 0; 2766 } 2767 2768 if (version < 3 && refcount_bits != 16) { 2769 error_setg(errp, "Different refcount widths than 16 bits require " 2770 "compatibility level 1.1 or above (use compat=1.1 or " 2771 "greater)"); 2772 return 0; 2773 } 2774 2775 return refcount_bits; 2776 } 2777 2778 static int coroutine_fn 2779 qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) 2780 { 2781 BlockdevCreateOptionsQcow2 *qcow2_opts; 2782 QDict *options; 2783 2784 /* 2785 * Open the image file and write a minimal qcow2 header. 2786 * 2787 * We keep things simple and start with a zero-sized image. We also 2788 * do without refcount blocks or a L1 table for now. We'll fix the 2789 * inconsistency later. 2790 * 2791 * We do need a refcount table because growing the refcount table means 2792 * allocating two new refcount blocks - the seconds of which would be at 2793 * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file 2794 * size for any qcow2 image. 2795 */ 2796 BlockBackend *blk = NULL; 2797 BlockDriverState *bs = NULL; 2798 QCowHeader *header; 2799 size_t cluster_size; 2800 int version; 2801 int refcount_order; 2802 uint64_t* refcount_table; 2803 Error *local_err = NULL; 2804 int ret; 2805 2806 assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2); 2807 qcow2_opts = &create_options->u.qcow2; 2808 2809 bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp); 2810 if (bs == NULL) { 2811 return -EIO; 2812 } 2813 2814 /* Validate options and set default values */ 2815 if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) { 2816 error_setg(errp, "Image size must be a multiple of 512 bytes"); 2817 ret = -EINVAL; 2818 goto out; 2819 } 2820 2821 if (qcow2_opts->has_version) { 2822 switch (qcow2_opts->version) { 2823 case BLOCKDEV_QCOW2_VERSION_V2: 2824 version = 2; 2825 break; 2826 case BLOCKDEV_QCOW2_VERSION_V3: 2827 version = 3; 2828 break; 2829 default: 2830 g_assert_not_reached(); 2831 } 2832 } else { 2833 version = 3; 2834 } 2835 2836 if (qcow2_opts->has_cluster_size) { 2837 cluster_size = qcow2_opts->cluster_size; 2838 } else { 2839 cluster_size = DEFAULT_CLUSTER_SIZE; 2840 } 2841 2842 if (!validate_cluster_size(cluster_size, errp)) { 2843 ret = -EINVAL; 2844 goto out; 2845 } 2846 2847 if (!qcow2_opts->has_preallocation) { 2848 qcow2_opts->preallocation = PREALLOC_MODE_OFF; 2849 } 2850 if (qcow2_opts->has_backing_file && 2851 qcow2_opts->preallocation != PREALLOC_MODE_OFF) 2852 { 2853 error_setg(errp, "Backing file and preallocation cannot be used at " 2854 "the same time"); 2855 ret = -EINVAL; 2856 goto out; 2857 } 2858 if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) { 2859 error_setg(errp, "Backing format cannot be used without backing file"); 2860 ret = -EINVAL; 2861 goto out; 2862 } 2863 2864 if (!qcow2_opts->has_lazy_refcounts) { 2865 qcow2_opts->lazy_refcounts = false; 2866 } 2867 if (version < 3 && qcow2_opts->lazy_refcounts) { 2868 error_setg(errp, "Lazy refcounts only supported with compatibility " 2869 "level 1.1 and above (use version=v3 or greater)"); 2870 ret = -EINVAL; 2871 goto out; 2872 } 2873 2874 if (!qcow2_opts->has_refcount_bits) { 2875 qcow2_opts->refcount_bits = 16; 2876 } 2877 if (qcow2_opts->refcount_bits > 64 || 2878 !is_power_of_2(qcow2_opts->refcount_bits)) 2879 { 2880 error_setg(errp, "Refcount width must be a power of two and may not " 2881 "exceed 64 bits"); 2882 ret = -EINVAL; 2883 goto out; 2884 } 2885 if (version < 3 && qcow2_opts->refcount_bits != 16) { 2886 error_setg(errp, "Different refcount widths than 16 bits require " 2887 "compatibility level 1.1 or above (use version=v3 or " 2888 "greater)"); 2889 ret = -EINVAL; 2890 goto out; 2891 } 2892 refcount_order = ctz32(qcow2_opts->refcount_bits); 2893 2894 2895 /* Create BlockBackend to write to the image */ 2896 blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); 2897 ret = blk_insert_bs(blk, bs, errp); 2898 if (ret < 0) { 2899 goto out; 2900 } 2901 blk_set_allow_write_beyond_eof(blk, true); 2902 2903 /* Clear the protocol layer and preallocate it if necessary */ 2904 ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp); 2905 if (ret < 0) { 2906 goto out; 2907 } 2908 2909 if (qcow2_opts->preallocation == PREALLOC_MODE_FULL || 2910 qcow2_opts->preallocation == PREALLOC_MODE_FALLOC) 2911 { 2912 int64_t prealloc_size = 2913 qcow2_calc_prealloc_size(qcow2_opts->size, cluster_size, 2914 refcount_order); 2915 2916 ret = blk_truncate(blk, prealloc_size, qcow2_opts->preallocation, errp); 2917 if (ret < 0) { 2918 goto out; 2919 } 2920 } 2921 2922 /* Write the header */ 2923 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); 2924 header = g_malloc0(cluster_size); 2925 *header = (QCowHeader) { 2926 .magic = cpu_to_be32(QCOW_MAGIC), 2927 .version = cpu_to_be32(version), 2928 .cluster_bits = cpu_to_be32(ctz32(cluster_size)), 2929 .size = cpu_to_be64(0), 2930 .l1_table_offset = cpu_to_be64(0), 2931 .l1_size = cpu_to_be32(0), 2932 .refcount_table_offset = cpu_to_be64(cluster_size), 2933 .refcount_table_clusters = cpu_to_be32(1), 2934 .refcount_order = cpu_to_be32(refcount_order), 2935 .header_length = cpu_to_be32(sizeof(*header)), 2936 }; 2937 2938 /* We'll update this to correct value later */ 2939 header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 2940 2941 if (qcow2_opts->lazy_refcounts) { 2942 header->compatible_features |= 2943 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); 2944 } 2945 2946 ret = blk_pwrite(blk, 0, header, cluster_size, 0); 2947 g_free(header); 2948 if (ret < 0) { 2949 error_setg_errno(errp, -ret, "Could not write qcow2 header"); 2950 goto out; 2951 } 2952 2953 /* Write a refcount table with one refcount block */ 2954 refcount_table = g_malloc0(2 * cluster_size); 2955 refcount_table[0] = cpu_to_be64(2 * cluster_size); 2956 ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0); 2957 g_free(refcount_table); 2958 2959 if (ret < 0) { 2960 error_setg_errno(errp, -ret, "Could not write refcount table"); 2961 goto out; 2962 } 2963 2964 blk_unref(blk); 2965 blk = NULL; 2966 2967 /* 2968 * And now open the image and make it consistent first (i.e. increase the 2969 * refcount of the cluster that is occupied by the header and the refcount 2970 * table) 2971 */ 2972 options = qdict_new(); 2973 qdict_put_str(options, "driver", "qcow2"); 2974 qdict_put_str(options, "file", bs->node_name); 2975 blk = blk_new_open(NULL, NULL, options, 2976 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, 2977 &local_err); 2978 if (blk == NULL) { 2979 error_propagate(errp, local_err); 2980 ret = -EIO; 2981 goto out; 2982 } 2983 2984 ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); 2985 if (ret < 0) { 2986 error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " 2987 "header and refcount table"); 2988 goto out; 2989 2990 } else if (ret != 0) { 2991 error_report("Huh, first cluster in empty image is already in use?"); 2992 abort(); 2993 } 2994 2995 /* Create a full header (including things like feature table) */ 2996 ret = qcow2_update_header(blk_bs(blk)); 2997 if (ret < 0) { 2998 error_setg_errno(errp, -ret, "Could not update qcow2 header"); 2999 goto out; 3000 } 3001 3002 /* Okay, now that we have a valid image, let's give it the right size */ 3003 ret = blk_truncate(blk, qcow2_opts->size, PREALLOC_MODE_OFF, errp); 3004 if (ret < 0) { 3005 error_prepend(errp, "Could not resize image: "); 3006 goto out; 3007 } 3008 3009 /* Want a backing file? There you go.*/ 3010 if (qcow2_opts->has_backing_file) { 3011 const char *backing_format = NULL; 3012 3013 if (qcow2_opts->has_backing_fmt) { 3014 backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt); 3015 } 3016 3017 ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file, 3018 backing_format); 3019 if (ret < 0) { 3020 error_setg_errno(errp, -ret, "Could not assign backing file '%s' " 3021 "with format '%s'", qcow2_opts->backing_file, 3022 backing_format); 3023 goto out; 3024 } 3025 } 3026 3027 /* Want encryption? There you go. */ 3028 if (qcow2_opts->has_encrypt) { 3029 ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp); 3030 if (ret < 0) { 3031 goto out; 3032 } 3033 } 3034 3035 /* And if we're supposed to preallocate metadata, do that now */ 3036 if (qcow2_opts->preallocation != PREALLOC_MODE_OFF) { 3037 ret = preallocate(blk_bs(blk), 0, qcow2_opts->size); 3038 if (ret < 0) { 3039 error_setg_errno(errp, -ret, "Could not preallocate metadata"); 3040 goto out; 3041 } 3042 } 3043 3044 blk_unref(blk); 3045 blk = NULL; 3046 3047 /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. 3048 * Using BDRV_O_NO_IO, since encryption is now setup we don't want to 3049 * have to setup decryption context. We're not doing any I/O on the top 3050 * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does 3051 * not have effect. 3052 */ 3053 options = qdict_new(); 3054 qdict_put_str(options, "driver", "qcow2"); 3055 qdict_put_str(options, "file", bs->node_name); 3056 blk = blk_new_open(NULL, NULL, options, 3057 BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO, 3058 &local_err); 3059 if (blk == NULL) { 3060 error_propagate(errp, local_err); 3061 ret = -EIO; 3062 goto out; 3063 } 3064 3065 ret = 0; 3066 out: 3067 blk_unref(blk); 3068 bdrv_unref(bs); 3069 return ret; 3070 } 3071 3072 static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, 3073 Error **errp) 3074 { 3075 BlockdevCreateOptions *create_options = NULL; 3076 QDict *qdict = NULL; 3077 QObject *qobj; 3078 Visitor *v; 3079 BlockDriverState *bs = NULL; 3080 Error *local_err = NULL; 3081 const char *val; 3082 int ret; 3083 3084 /* Only the keyval visitor supports the dotted syntax needed for 3085 * encryption, so go through a QDict before getting a QAPI type. Ignore 3086 * options meant for the protocol layer so that the visitor doesn't 3087 * complain. */ 3088 qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts, 3089 true); 3090 3091 /* Handle encryption options */ 3092 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT); 3093 if (val && !strcmp(val, "on")) { 3094 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow"); 3095 } else if (val && !strcmp(val, "off")) { 3096 qdict_del(qdict, BLOCK_OPT_ENCRYPT); 3097 } 3098 3099 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT); 3100 if (val && !strcmp(val, "aes")) { 3101 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow"); 3102 } 3103 3104 /* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into 3105 * version=v2/v3 below. */ 3106 val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL); 3107 if (val && !strcmp(val, "0.10")) { 3108 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2"); 3109 } else if (val && !strcmp(val, "1.1")) { 3110 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3"); 3111 } 3112 3113 /* Change legacy command line options into QMP ones */ 3114 static const QDictRenames opt_renames[] = { 3115 { BLOCK_OPT_BACKING_FILE, "backing-file" }, 3116 { BLOCK_OPT_BACKING_FMT, "backing-fmt" }, 3117 { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" }, 3118 { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" }, 3119 { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" }, 3120 { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT }, 3121 { BLOCK_OPT_COMPAT_LEVEL, "version" }, 3122 { NULL, NULL }, 3123 }; 3124 3125 if (!qdict_rename_keys(qdict, opt_renames, errp)) { 3126 ret = -EINVAL; 3127 goto finish; 3128 } 3129 3130 /* Create and open the file (protocol layer) */ 3131 ret = bdrv_create_file(filename, opts, errp); 3132 if (ret < 0) { 3133 goto finish; 3134 } 3135 3136 bs = bdrv_open(filename, NULL, NULL, 3137 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); 3138 if (bs == NULL) { 3139 ret = -EIO; 3140 goto finish; 3141 } 3142 3143 /* Set 'driver' and 'node' options */ 3144 qdict_put_str(qdict, "driver", "qcow2"); 3145 qdict_put_str(qdict, "file", bs->node_name); 3146 3147 /* Now get the QAPI type BlockdevCreateOptions */ 3148 qobj = qdict_crumple(qdict, errp); 3149 qobject_unref(qdict); 3150 qdict = qobject_to(QDict, qobj); 3151 if (qdict == NULL) { 3152 ret = -EINVAL; 3153 goto finish; 3154 } 3155 3156 v = qobject_input_visitor_new_keyval(QOBJECT(qdict)); 3157 visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err); 3158 visit_free(v); 3159 3160 if (local_err) { 3161 error_propagate(errp, local_err); 3162 ret = -EINVAL; 3163 goto finish; 3164 } 3165 3166 /* Silently round up size */ 3167 create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size, 3168 BDRV_SECTOR_SIZE); 3169 3170 /* Create the qcow2 image (format layer) */ 3171 ret = qcow2_co_create(create_options, errp); 3172 if (ret < 0) { 3173 goto finish; 3174 } 3175 3176 ret = 0; 3177 finish: 3178 qobject_unref(qdict); 3179 bdrv_unref(bs); 3180 qapi_free_BlockdevCreateOptions(create_options); 3181 return ret; 3182 } 3183 3184 3185 static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) 3186 { 3187 int64_t nr; 3188 int res; 3189 3190 /* Clamp to image length, before checking status of underlying sectors */ 3191 if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { 3192 bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset; 3193 } 3194 3195 if (!bytes) { 3196 return true; 3197 } 3198 res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL); 3199 return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes; 3200 } 3201 3202 static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, 3203 int64_t offset, int bytes, BdrvRequestFlags flags) 3204 { 3205 int ret; 3206 BDRVQcow2State *s = bs->opaque; 3207 3208 uint32_t head = offset % s->cluster_size; 3209 uint32_t tail = (offset + bytes) % s->cluster_size; 3210 3211 trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes); 3212 if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) { 3213 tail = 0; 3214 } 3215 3216 if (head || tail) { 3217 uint64_t off; 3218 unsigned int nr; 3219 3220 assert(head + bytes <= s->cluster_size); 3221 3222 /* check whether remainder of cluster already reads as zero */ 3223 if (!(is_zero(bs, offset - head, head) && 3224 is_zero(bs, offset + bytes, 3225 tail ? s->cluster_size - tail : 0))) { 3226 return -ENOTSUP; 3227 } 3228 3229 qemu_co_mutex_lock(&s->lock); 3230 /* We can have new write after previous check */ 3231 offset = QEMU_ALIGN_DOWN(offset, s->cluster_size); 3232 bytes = s->cluster_size; 3233 nr = s->cluster_size; 3234 ret = qcow2_get_cluster_offset(bs, offset, &nr, &off); 3235 if (ret != QCOW2_CLUSTER_UNALLOCATED && 3236 ret != QCOW2_CLUSTER_ZERO_PLAIN && 3237 ret != QCOW2_CLUSTER_ZERO_ALLOC) { 3238 qemu_co_mutex_unlock(&s->lock); 3239 return -ENOTSUP; 3240 } 3241 } else { 3242 qemu_co_mutex_lock(&s->lock); 3243 } 3244 3245 trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes); 3246 3247 /* Whatever is left can use real zero clusters */ 3248 ret = qcow2_cluster_zeroize(bs, offset, bytes, flags); 3249 qemu_co_mutex_unlock(&s->lock); 3250 3251 return ret; 3252 } 3253 3254 static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, 3255 int64_t offset, int bytes) 3256 { 3257 int ret; 3258 BDRVQcow2State *s = bs->opaque; 3259 3260 if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) { 3261 assert(bytes < s->cluster_size); 3262 /* Ignore partial clusters, except for the special case of the 3263 * complete partial cluster at the end of an unaligned file */ 3264 if (!QEMU_IS_ALIGNED(offset, s->cluster_size) || 3265 offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) { 3266 return -ENOTSUP; 3267 } 3268 } 3269 3270 qemu_co_mutex_lock(&s->lock); 3271 ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST, 3272 false); 3273 qemu_co_mutex_unlock(&s->lock); 3274 return ret; 3275 } 3276 3277 static int qcow2_truncate(BlockDriverState *bs, int64_t offset, 3278 PreallocMode prealloc, Error **errp) 3279 { 3280 BDRVQcow2State *s = bs->opaque; 3281 uint64_t old_length; 3282 int64_t new_l1_size; 3283 int ret; 3284 3285 if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA && 3286 prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL) 3287 { 3288 error_setg(errp, "Unsupported preallocation mode '%s'", 3289 PreallocMode_str(prealloc)); 3290 return -ENOTSUP; 3291 } 3292 3293 if (offset & 511) { 3294 error_setg(errp, "The new size must be a multiple of 512"); 3295 return -EINVAL; 3296 } 3297 3298 /* cannot proceed if image has snapshots */ 3299 if (s->nb_snapshots) { 3300 error_setg(errp, "Can't resize an image which has snapshots"); 3301 return -ENOTSUP; 3302 } 3303 3304 /* cannot proceed if image has bitmaps */ 3305 if (s->nb_bitmaps) { 3306 /* TODO: resize bitmaps in the image */ 3307 error_setg(errp, "Can't resize an image which has bitmaps"); 3308 return -ENOTSUP; 3309 } 3310 3311 old_length = bs->total_sectors * 512; 3312 new_l1_size = size_to_l1(s, offset); 3313 3314 if (offset < old_length) { 3315 int64_t last_cluster, old_file_size; 3316 if (prealloc != PREALLOC_MODE_OFF) { 3317 error_setg(errp, 3318 "Preallocation can't be used for shrinking an image"); 3319 return -EINVAL; 3320 } 3321 3322 ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), 3323 old_length - ROUND_UP(offset, 3324 s->cluster_size), 3325 QCOW2_DISCARD_ALWAYS, true); 3326 if (ret < 0) { 3327 error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); 3328 return ret; 3329 } 3330 3331 ret = qcow2_shrink_l1_table(bs, new_l1_size); 3332 if (ret < 0) { 3333 error_setg_errno(errp, -ret, 3334 "Failed to reduce the number of L2 tables"); 3335 return ret; 3336 } 3337 3338 ret = qcow2_shrink_reftable(bs); 3339 if (ret < 0) { 3340 error_setg_errno(errp, -ret, 3341 "Failed to discard unused refblocks"); 3342 return ret; 3343 } 3344 3345 old_file_size = bdrv_getlength(bs->file->bs); 3346 if (old_file_size < 0) { 3347 error_setg_errno(errp, -old_file_size, 3348 "Failed to inquire current file length"); 3349 return old_file_size; 3350 } 3351 last_cluster = qcow2_get_last_cluster(bs, old_file_size); 3352 if (last_cluster < 0) { 3353 error_setg_errno(errp, -last_cluster, 3354 "Failed to find the last cluster"); 3355 return last_cluster; 3356 } 3357 if ((last_cluster + 1) * s->cluster_size < old_file_size) { 3358 Error *local_err = NULL; 3359 3360 bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size, 3361 PREALLOC_MODE_OFF, &local_err); 3362 if (local_err) { 3363 warn_reportf_err(local_err, 3364 "Failed to truncate the tail of the image: "); 3365 } 3366 } 3367 } else { 3368 ret = qcow2_grow_l1_table(bs, new_l1_size, true); 3369 if (ret < 0) { 3370 error_setg_errno(errp, -ret, "Failed to grow the L1 table"); 3371 return ret; 3372 } 3373 } 3374 3375 switch (prealloc) { 3376 case PREALLOC_MODE_OFF: 3377 break; 3378 3379 case PREALLOC_MODE_METADATA: 3380 ret = preallocate(bs, old_length, offset); 3381 if (ret < 0) { 3382 error_setg_errno(errp, -ret, "Preallocation failed"); 3383 return ret; 3384 } 3385 break; 3386 3387 case PREALLOC_MODE_FALLOC: 3388 case PREALLOC_MODE_FULL: 3389 { 3390 int64_t allocation_start, host_offset, guest_offset; 3391 int64_t clusters_allocated; 3392 int64_t old_file_size, new_file_size; 3393 uint64_t nb_new_data_clusters, nb_new_l2_tables; 3394 3395 old_file_size = bdrv_getlength(bs->file->bs); 3396 if (old_file_size < 0) { 3397 error_setg_errno(errp, -old_file_size, 3398 "Failed to inquire current file length"); 3399 return old_file_size; 3400 } 3401 old_file_size = ROUND_UP(old_file_size, s->cluster_size); 3402 3403 nb_new_data_clusters = DIV_ROUND_UP(offset - old_length, 3404 s->cluster_size); 3405 3406 /* This is an overestimation; we will not actually allocate space for 3407 * these in the file but just make sure the new refcount structures are 3408 * able to cover them so we will not have to allocate new refblocks 3409 * while entering the data blocks in the potentially new L2 tables. 3410 * (We do not actually care where the L2 tables are placed. Maybe they 3411 * are already allocated or they can be placed somewhere before 3412 * @old_file_size. It does not matter because they will be fully 3413 * allocated automatically, so they do not need to be covered by the 3414 * preallocation. All that matters is that we will not have to allocate 3415 * new refcount structures for them.) */ 3416 nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters, 3417 s->cluster_size / sizeof(uint64_t)); 3418 /* The cluster range may not be aligned to L2 boundaries, so add one L2 3419 * table for a potential head/tail */ 3420 nb_new_l2_tables++; 3421 3422 allocation_start = qcow2_refcount_area(bs, old_file_size, 3423 nb_new_data_clusters + 3424 nb_new_l2_tables, 3425 true, 0, 0); 3426 if (allocation_start < 0) { 3427 error_setg_errno(errp, -allocation_start, 3428 "Failed to resize refcount structures"); 3429 return allocation_start; 3430 } 3431 3432 clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, 3433 nb_new_data_clusters); 3434 if (clusters_allocated < 0) { 3435 error_setg_errno(errp, -clusters_allocated, 3436 "Failed to allocate data clusters"); 3437 return -clusters_allocated; 3438 } 3439 3440 assert(clusters_allocated == nb_new_data_clusters); 3441 3442 /* Allocate the data area */ 3443 new_file_size = allocation_start + 3444 nb_new_data_clusters * s->cluster_size; 3445 ret = bdrv_truncate(bs->file, new_file_size, prealloc, errp); 3446 if (ret < 0) { 3447 error_prepend(errp, "Failed to resize underlying file: "); 3448 qcow2_free_clusters(bs, allocation_start, 3449 nb_new_data_clusters * s->cluster_size, 3450 QCOW2_DISCARD_OTHER); 3451 return ret; 3452 } 3453 3454 /* Create the necessary L2 entries */ 3455 host_offset = allocation_start; 3456 guest_offset = old_length; 3457 while (nb_new_data_clusters) { 3458 int64_t nb_clusters = MIN( 3459 nb_new_data_clusters, 3460 s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset)); 3461 QCowL2Meta allocation = { 3462 .offset = guest_offset, 3463 .alloc_offset = host_offset, 3464 .nb_clusters = nb_clusters, 3465 }; 3466 qemu_co_queue_init(&allocation.dependent_requests); 3467 3468 ret = qcow2_alloc_cluster_link_l2(bs, &allocation); 3469 if (ret < 0) { 3470 error_setg_errno(errp, -ret, "Failed to update L2 tables"); 3471 qcow2_free_clusters(bs, host_offset, 3472 nb_new_data_clusters * s->cluster_size, 3473 QCOW2_DISCARD_OTHER); 3474 return ret; 3475 } 3476 3477 guest_offset += nb_clusters * s->cluster_size; 3478 host_offset += nb_clusters * s->cluster_size; 3479 nb_new_data_clusters -= nb_clusters; 3480 } 3481 break; 3482 } 3483 3484 default: 3485 g_assert_not_reached(); 3486 } 3487 3488 if (prealloc != PREALLOC_MODE_OFF) { 3489 /* Flush metadata before actually changing the image size */ 3490 ret = bdrv_flush(bs); 3491 if (ret < 0) { 3492 error_setg_errno(errp, -ret, 3493 "Failed to flush the preallocated area to disk"); 3494 return ret; 3495 } 3496 } 3497 3498 /* write updated header.size */ 3499 offset = cpu_to_be64(offset); 3500 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), 3501 &offset, sizeof(uint64_t)); 3502 if (ret < 0) { 3503 error_setg_errno(errp, -ret, "Failed to update the image size"); 3504 return ret; 3505 } 3506 3507 s->l1_vm_state_index = new_l1_size; 3508 return 0; 3509 } 3510 3511 /* XXX: put compressed sectors first, then all the cluster aligned 3512 tables to avoid losing bytes in alignment */ 3513 static coroutine_fn int 3514 qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, 3515 uint64_t bytes, QEMUIOVector *qiov) 3516 { 3517 BDRVQcow2State *s = bs->opaque; 3518 QEMUIOVector hd_qiov; 3519 struct iovec iov; 3520 z_stream strm; 3521 int ret, out_len; 3522 uint8_t *buf, *out_buf; 3523 int64_t cluster_offset; 3524 3525 if (bytes == 0) { 3526 /* align end of file to a sector boundary to ease reading with 3527 sector based I/Os */ 3528 cluster_offset = bdrv_getlength(bs->file->bs); 3529 if (cluster_offset < 0) { 3530 return cluster_offset; 3531 } 3532 return bdrv_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, NULL); 3533 } 3534 3535 if (offset_into_cluster(s, offset)) { 3536 return -EINVAL; 3537 } 3538 3539 buf = qemu_blockalign(bs, s->cluster_size); 3540 if (bytes != s->cluster_size) { 3541 if (bytes > s->cluster_size || 3542 offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS) 3543 { 3544 qemu_vfree(buf); 3545 return -EINVAL; 3546 } 3547 /* Zero-pad last write if image size is not cluster aligned */ 3548 memset(buf + bytes, 0, s->cluster_size - bytes); 3549 } 3550 qemu_iovec_to_buf(qiov, 0, buf, bytes); 3551 3552 out_buf = g_malloc(s->cluster_size); 3553 3554 /* best compression, small window, no zlib header */ 3555 memset(&strm, 0, sizeof(strm)); 3556 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, 3557 Z_DEFLATED, -12, 3558 9, Z_DEFAULT_STRATEGY); 3559 if (ret != 0) { 3560 ret = -EINVAL; 3561 goto fail; 3562 } 3563 3564 strm.avail_in = s->cluster_size; 3565 strm.next_in = (uint8_t *)buf; 3566 strm.avail_out = s->cluster_size; 3567 strm.next_out = out_buf; 3568 3569 ret = deflate(&strm, Z_FINISH); 3570 if (ret != Z_STREAM_END && ret != Z_OK) { 3571 deflateEnd(&strm); 3572 ret = -EINVAL; 3573 goto fail; 3574 } 3575 out_len = strm.next_out - out_buf; 3576 3577 deflateEnd(&strm); 3578 3579 if (ret != Z_STREAM_END || out_len >= s->cluster_size) { 3580 /* could not compress: write normal cluster */ 3581 ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0); 3582 if (ret < 0) { 3583 goto fail; 3584 } 3585 goto success; 3586 } 3587 3588 qemu_co_mutex_lock(&s->lock); 3589 cluster_offset = 3590 qcow2_alloc_compressed_cluster_offset(bs, offset, out_len); 3591 if (!cluster_offset) { 3592 qemu_co_mutex_unlock(&s->lock); 3593 ret = -EIO; 3594 goto fail; 3595 } 3596 cluster_offset &= s->cluster_offset_mask; 3597 3598 ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len); 3599 qemu_co_mutex_unlock(&s->lock); 3600 if (ret < 0) { 3601 goto fail; 3602 } 3603 3604 iov = (struct iovec) { 3605 .iov_base = out_buf, 3606 .iov_len = out_len, 3607 }; 3608 qemu_iovec_init_external(&hd_qiov, &iov, 1); 3609 3610 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); 3611 ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0); 3612 if (ret < 0) { 3613 goto fail; 3614 } 3615 success: 3616 ret = 0; 3617 fail: 3618 qemu_vfree(buf); 3619 g_free(out_buf); 3620 return ret; 3621 } 3622 3623 static int make_completely_empty(BlockDriverState *bs) 3624 { 3625 BDRVQcow2State *s = bs->opaque; 3626 Error *local_err = NULL; 3627 int ret, l1_clusters; 3628 int64_t offset; 3629 uint64_t *new_reftable = NULL; 3630 uint64_t rt_entry, l1_size2; 3631 struct { 3632 uint64_t l1_offset; 3633 uint64_t reftable_offset; 3634 uint32_t reftable_clusters; 3635 } QEMU_PACKED l1_ofs_rt_ofs_cls; 3636 3637 ret = qcow2_cache_empty(bs, s->l2_table_cache); 3638 if (ret < 0) { 3639 goto fail; 3640 } 3641 3642 ret = qcow2_cache_empty(bs, s->refcount_block_cache); 3643 if (ret < 0) { 3644 goto fail; 3645 } 3646 3647 /* Refcounts will be broken utterly */ 3648 ret = qcow2_mark_dirty(bs); 3649 if (ret < 0) { 3650 goto fail; 3651 } 3652 3653 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 3654 3655 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 3656 l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t); 3657 3658 /* After this call, neither the in-memory nor the on-disk refcount 3659 * information accurately describe the actual references */ 3660 3661 ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset, 3662 l1_clusters * s->cluster_size, 0); 3663 if (ret < 0) { 3664 goto fail_broken_refcounts; 3665 } 3666 memset(s->l1_table, 0, l1_size2); 3667 3668 BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE); 3669 3670 /* Overwrite enough clusters at the beginning of the sectors to place 3671 * the refcount table, a refcount block and the L1 table in; this may 3672 * overwrite parts of the existing refcount and L1 table, which is not 3673 * an issue because the dirty flag is set, complete data loss is in fact 3674 * desired and partial data loss is consequently fine as well */ 3675 ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size, 3676 (2 + l1_clusters) * s->cluster_size, 0); 3677 /* This call (even if it failed overall) may have overwritten on-disk 3678 * refcount structures; in that case, the in-memory refcount information 3679 * will probably differ from the on-disk information which makes the BDS 3680 * unusable */ 3681 if (ret < 0) { 3682 goto fail_broken_refcounts; 3683 } 3684 3685 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 3686 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); 3687 3688 /* "Create" an empty reftable (one cluster) directly after the image 3689 * header and an empty L1 table three clusters after the image header; 3690 * the cluster between those two will be used as the first refblock */ 3691 l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size); 3692 l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size); 3693 l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1); 3694 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset), 3695 &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls)); 3696 if (ret < 0) { 3697 goto fail_broken_refcounts; 3698 } 3699 3700 s->l1_table_offset = 3 * s->cluster_size; 3701 3702 new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t)); 3703 if (!new_reftable) { 3704 ret = -ENOMEM; 3705 goto fail_broken_refcounts; 3706 } 3707 3708 s->refcount_table_offset = s->cluster_size; 3709 s->refcount_table_size = s->cluster_size / sizeof(uint64_t); 3710 s->max_refcount_table_index = 0; 3711 3712 g_free(s->refcount_table); 3713 s->refcount_table = new_reftable; 3714 new_reftable = NULL; 3715 3716 /* Now the in-memory refcount information again corresponds to the on-disk 3717 * information (reftable is empty and no refblocks (the refblock cache is 3718 * empty)); however, this means some clusters (e.g. the image header) are 3719 * referenced, but not refcounted, but the normal qcow2 code assumes that 3720 * the in-memory information is always correct */ 3721 3722 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); 3723 3724 /* Enter the first refblock into the reftable */ 3725 rt_entry = cpu_to_be64(2 * s->cluster_size); 3726 ret = bdrv_pwrite_sync(bs->file, s->cluster_size, 3727 &rt_entry, sizeof(rt_entry)); 3728 if (ret < 0) { 3729 goto fail_broken_refcounts; 3730 } 3731 s->refcount_table[0] = 2 * s->cluster_size; 3732 3733 s->free_cluster_index = 0; 3734 assert(3 + l1_clusters <= s->refcount_block_size); 3735 offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2); 3736 if (offset < 0) { 3737 ret = offset; 3738 goto fail_broken_refcounts; 3739 } else if (offset > 0) { 3740 error_report("First cluster in emptied image is in use"); 3741 abort(); 3742 } 3743 3744 /* Now finally the in-memory information corresponds to the on-disk 3745 * structures and is correct */ 3746 ret = qcow2_mark_clean(bs); 3747 if (ret < 0) { 3748 goto fail; 3749 } 3750 3751 ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, 3752 PREALLOC_MODE_OFF, &local_err); 3753 if (ret < 0) { 3754 error_report_err(local_err); 3755 goto fail; 3756 } 3757 3758 return 0; 3759 3760 fail_broken_refcounts: 3761 /* The BDS is unusable at this point. If we wanted to make it usable, we 3762 * would have to call qcow2_refcount_close(), qcow2_refcount_init(), 3763 * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init() 3764 * again. However, because the functions which could have caused this error 3765 * path to be taken are used by those functions as well, it's very likely 3766 * that that sequence will fail as well. Therefore, just eject the BDS. */ 3767 bs->drv = NULL; 3768 3769 fail: 3770 g_free(new_reftable); 3771 return ret; 3772 } 3773 3774 static int qcow2_make_empty(BlockDriverState *bs) 3775 { 3776 BDRVQcow2State *s = bs->opaque; 3777 uint64_t offset, end_offset; 3778 int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size); 3779 int l1_clusters, ret = 0; 3780 3781 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 3782 3783 if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps && 3784 3 + l1_clusters <= s->refcount_block_size && 3785 s->crypt_method_header != QCOW_CRYPT_LUKS) { 3786 /* The following function only works for qcow2 v3 images (it 3787 * requires the dirty flag) and only as long as there are no 3788 * features that reserve extra clusters (such as snapshots, 3789 * LUKS header, or persistent bitmaps), because it completely 3790 * empties the image. Furthermore, the L1 table and three 3791 * additional clusters (image header, refcount table, one 3792 * refcount block) have to fit inside one refcount block. */ 3793 return make_completely_empty(bs); 3794 } 3795 3796 /* This fallback code simply discards every active cluster; this is slow, 3797 * but works in all cases */ 3798 end_offset = bs->total_sectors * BDRV_SECTOR_SIZE; 3799 for (offset = 0; offset < end_offset; offset += step) { 3800 /* As this function is generally used after committing an external 3801 * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the 3802 * default action for this kind of discard is to pass the discard, 3803 * which will ideally result in an actually smaller image file, as 3804 * is probably desired. */ 3805 ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset), 3806 QCOW2_DISCARD_SNAPSHOT, true); 3807 if (ret < 0) { 3808 break; 3809 } 3810 } 3811 3812 return ret; 3813 } 3814 3815 static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) 3816 { 3817 BDRVQcow2State *s = bs->opaque; 3818 int ret; 3819 3820 qemu_co_mutex_lock(&s->lock); 3821 ret = qcow2_write_caches(bs); 3822 qemu_co_mutex_unlock(&s->lock); 3823 3824 return ret; 3825 } 3826 3827 static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, 3828 Error **errp) 3829 { 3830 Error *local_err = NULL; 3831 BlockMeasureInfo *info; 3832 uint64_t required = 0; /* bytes that contribute to required size */ 3833 uint64_t virtual_size; /* disk size as seen by guest */ 3834 uint64_t refcount_bits; 3835 uint64_t l2_tables; 3836 size_t cluster_size; 3837 int version; 3838 char *optstr; 3839 PreallocMode prealloc; 3840 bool has_backing_file; 3841 3842 /* Parse image creation options */ 3843 cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err); 3844 if (local_err) { 3845 goto err; 3846 } 3847 3848 version = qcow2_opt_get_version_del(opts, &local_err); 3849 if (local_err) { 3850 goto err; 3851 } 3852 3853 refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err); 3854 if (local_err) { 3855 goto err; 3856 } 3857 3858 optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 3859 prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr, 3860 PREALLOC_MODE_OFF, &local_err); 3861 g_free(optstr); 3862 if (local_err) { 3863 goto err; 3864 } 3865 3866 optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); 3867 has_backing_file = !!optstr; 3868 g_free(optstr); 3869 3870 virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 3871 virtual_size = ROUND_UP(virtual_size, cluster_size); 3872 3873 /* Check that virtual disk size is valid */ 3874 l2_tables = DIV_ROUND_UP(virtual_size / cluster_size, 3875 cluster_size / sizeof(uint64_t)); 3876 if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) { 3877 error_setg(&local_err, "The image size is too large " 3878 "(try using a larger cluster size)"); 3879 goto err; 3880 } 3881 3882 /* Account for input image */ 3883 if (in_bs) { 3884 int64_t ssize = bdrv_getlength(in_bs); 3885 if (ssize < 0) { 3886 error_setg_errno(&local_err, -ssize, 3887 "Unable to get image virtual_size"); 3888 goto err; 3889 } 3890 3891 virtual_size = ROUND_UP(ssize, cluster_size); 3892 3893 if (has_backing_file) { 3894 /* We don't how much of the backing chain is shared by the input 3895 * image and the new image file. In the worst case the new image's 3896 * backing file has nothing in common with the input image. Be 3897 * conservative and assume all clusters need to be written. 3898 */ 3899 required = virtual_size; 3900 } else { 3901 int64_t offset; 3902 int64_t pnum = 0; 3903 3904 for (offset = 0; offset < ssize; offset += pnum) { 3905 int ret; 3906 3907 ret = bdrv_block_status_above(in_bs, NULL, offset, 3908 ssize - offset, &pnum, NULL, 3909 NULL); 3910 if (ret < 0) { 3911 error_setg_errno(&local_err, -ret, 3912 "Unable to get block status"); 3913 goto err; 3914 } 3915 3916 if (ret & BDRV_BLOCK_ZERO) { 3917 /* Skip zero regions (safe with no backing file) */ 3918 } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == 3919 (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { 3920 /* Extend pnum to end of cluster for next iteration */ 3921 pnum = ROUND_UP(offset + pnum, cluster_size) - offset; 3922 3923 /* Count clusters we've seen */ 3924 required += offset % cluster_size + pnum; 3925 } 3926 } 3927 } 3928 } 3929 3930 /* Take into account preallocation. Nothing special is needed for 3931 * PREALLOC_MODE_METADATA since metadata is always counted. 3932 */ 3933 if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { 3934 required = virtual_size; 3935 } 3936 3937 info = g_new(BlockMeasureInfo, 1); 3938 info->fully_allocated = 3939 qcow2_calc_prealloc_size(virtual_size, cluster_size, 3940 ctz32(refcount_bits)); 3941 3942 /* Remove data clusters that are not required. This overestimates the 3943 * required size because metadata needed for the fully allocated file is 3944 * still counted. 3945 */ 3946 info->required = info->fully_allocated - virtual_size + required; 3947 return info; 3948 3949 err: 3950 error_propagate(errp, local_err); 3951 return NULL; 3952 } 3953 3954 static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 3955 { 3956 BDRVQcow2State *s = bs->opaque; 3957 bdi->unallocated_blocks_are_zero = true; 3958 bdi->cluster_size = s->cluster_size; 3959 bdi->vm_state_offset = qcow2_vm_state_offset(s); 3960 return 0; 3961 } 3962 3963 static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) 3964 { 3965 BDRVQcow2State *s = bs->opaque; 3966 ImageInfoSpecific *spec_info; 3967 QCryptoBlockInfo *encrypt_info = NULL; 3968 3969 if (s->crypto != NULL) { 3970 encrypt_info = qcrypto_block_get_info(s->crypto, &error_abort); 3971 } 3972 3973 spec_info = g_new(ImageInfoSpecific, 1); 3974 *spec_info = (ImageInfoSpecific){ 3975 .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, 3976 .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1), 3977 }; 3978 if (s->qcow_version == 2) { 3979 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 3980 .compat = g_strdup("0.10"), 3981 .refcount_bits = s->refcount_bits, 3982 }; 3983 } else if (s->qcow_version == 3) { 3984 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 3985 .compat = g_strdup("1.1"), 3986 .lazy_refcounts = s->compatible_features & 3987 QCOW2_COMPAT_LAZY_REFCOUNTS, 3988 .has_lazy_refcounts = true, 3989 .corrupt = s->incompatible_features & 3990 QCOW2_INCOMPAT_CORRUPT, 3991 .has_corrupt = true, 3992 .refcount_bits = s->refcount_bits, 3993 }; 3994 } else { 3995 /* if this assertion fails, this probably means a new version was 3996 * added without having it covered here */ 3997 assert(false); 3998 } 3999 4000 if (encrypt_info) { 4001 ImageInfoSpecificQCow2Encryption *qencrypt = 4002 g_new(ImageInfoSpecificQCow2Encryption, 1); 4003 switch (encrypt_info->format) { 4004 case Q_CRYPTO_BLOCK_FORMAT_QCOW: 4005 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES; 4006 qencrypt->u.aes = encrypt_info->u.qcow; 4007 break; 4008 case Q_CRYPTO_BLOCK_FORMAT_LUKS: 4009 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS; 4010 qencrypt->u.luks = encrypt_info->u.luks; 4011 break; 4012 default: 4013 abort(); 4014 } 4015 /* Since we did shallow copy above, erase any pointers 4016 * in the original info */ 4017 memset(&encrypt_info->u, 0, sizeof(encrypt_info->u)); 4018 qapi_free_QCryptoBlockInfo(encrypt_info); 4019 4020 spec_info->u.qcow2.data->has_encrypt = true; 4021 spec_info->u.qcow2.data->encrypt = qencrypt; 4022 } 4023 4024 return spec_info; 4025 } 4026 4027 static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 4028 int64_t pos) 4029 { 4030 BDRVQcow2State *s = bs->opaque; 4031 4032 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); 4033 return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos, 4034 qiov->size, qiov, 0); 4035 } 4036 4037 static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 4038 int64_t pos) 4039 { 4040 BDRVQcow2State *s = bs->opaque; 4041 4042 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); 4043 return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos, 4044 qiov->size, qiov, 0); 4045 } 4046 4047 /* 4048 * Downgrades an image's version. To achieve this, any incompatible features 4049 * have to be removed. 4050 */ 4051 static int qcow2_downgrade(BlockDriverState *bs, int target_version, 4052 BlockDriverAmendStatusCB *status_cb, void *cb_opaque) 4053 { 4054 BDRVQcow2State *s = bs->opaque; 4055 int current_version = s->qcow_version; 4056 int ret; 4057 4058 if (target_version == current_version) { 4059 return 0; 4060 } else if (target_version > current_version) { 4061 return -EINVAL; 4062 } else if (target_version != 2) { 4063 return -EINVAL; 4064 } 4065 4066 if (s->refcount_order != 4) { 4067 error_report("compat=0.10 requires refcount_bits=16"); 4068 return -ENOTSUP; 4069 } 4070 4071 /* clear incompatible features */ 4072 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 4073 ret = qcow2_mark_clean(bs); 4074 if (ret < 0) { 4075 return ret; 4076 } 4077 } 4078 4079 /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in 4080 * the first place; if that happens nonetheless, returning -ENOTSUP is the 4081 * best thing to do anyway */ 4082 4083 if (s->incompatible_features) { 4084 return -ENOTSUP; 4085 } 4086 4087 /* since we can ignore compatible features, we can set them to 0 as well */ 4088 s->compatible_features = 0; 4089 /* if lazy refcounts have been used, they have already been fixed through 4090 * clearing the dirty flag */ 4091 4092 /* clearing autoclear features is trivial */ 4093 s->autoclear_features = 0; 4094 4095 ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque); 4096 if (ret < 0) { 4097 return ret; 4098 } 4099 4100 s->qcow_version = target_version; 4101 ret = qcow2_update_header(bs); 4102 if (ret < 0) { 4103 s->qcow_version = current_version; 4104 return ret; 4105 } 4106 return 0; 4107 } 4108 4109 typedef enum Qcow2AmendOperation { 4110 /* This is the value Qcow2AmendHelperCBInfo::last_operation will be 4111 * statically initialized to so that the helper CB can discern the first 4112 * invocation from an operation change */ 4113 QCOW2_NO_OPERATION = 0, 4114 4115 QCOW2_CHANGING_REFCOUNT_ORDER, 4116 QCOW2_DOWNGRADING, 4117 } Qcow2AmendOperation; 4118 4119 typedef struct Qcow2AmendHelperCBInfo { 4120 /* The code coordinating the amend operations should only modify 4121 * these four fields; the rest will be managed by the CB */ 4122 BlockDriverAmendStatusCB *original_status_cb; 4123 void *original_cb_opaque; 4124 4125 Qcow2AmendOperation current_operation; 4126 4127 /* Total number of operations to perform (only set once) */ 4128 int total_operations; 4129 4130 /* The following fields are managed by the CB */ 4131 4132 /* Number of operations completed */ 4133 int operations_completed; 4134 4135 /* Cumulative offset of all completed operations */ 4136 int64_t offset_completed; 4137 4138 Qcow2AmendOperation last_operation; 4139 int64_t last_work_size; 4140 } Qcow2AmendHelperCBInfo; 4141 4142 static void qcow2_amend_helper_cb(BlockDriverState *bs, 4143 int64_t operation_offset, 4144 int64_t operation_work_size, void *opaque) 4145 { 4146 Qcow2AmendHelperCBInfo *info = opaque; 4147 int64_t current_work_size; 4148 int64_t projected_work_size; 4149 4150 if (info->current_operation != info->last_operation) { 4151 if (info->last_operation != QCOW2_NO_OPERATION) { 4152 info->offset_completed += info->last_work_size; 4153 info->operations_completed++; 4154 } 4155 4156 info->last_operation = info->current_operation; 4157 } 4158 4159 assert(info->total_operations > 0); 4160 assert(info->operations_completed < info->total_operations); 4161 4162 info->last_work_size = operation_work_size; 4163 4164 current_work_size = info->offset_completed + operation_work_size; 4165 4166 /* current_work_size is the total work size for (operations_completed + 1) 4167 * operations (which includes this one), so multiply it by the number of 4168 * operations not covered and divide it by the number of operations 4169 * covered to get a projection for the operations not covered */ 4170 projected_work_size = current_work_size * (info->total_operations - 4171 info->operations_completed - 1) 4172 / (info->operations_completed + 1); 4173 4174 info->original_status_cb(bs, info->offset_completed + operation_offset, 4175 current_work_size + projected_work_size, 4176 info->original_cb_opaque); 4177 } 4178 4179 static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, 4180 BlockDriverAmendStatusCB *status_cb, 4181 void *cb_opaque) 4182 { 4183 BDRVQcow2State *s = bs->opaque; 4184 int old_version = s->qcow_version, new_version = old_version; 4185 uint64_t new_size = 0; 4186 const char *backing_file = NULL, *backing_format = NULL; 4187 bool lazy_refcounts = s->use_lazy_refcounts; 4188 const char *compat = NULL; 4189 uint64_t cluster_size = s->cluster_size; 4190 bool encrypt; 4191 int encformat; 4192 int refcount_bits = s->refcount_bits; 4193 Error *local_err = NULL; 4194 int ret; 4195 QemuOptDesc *desc = opts->list->desc; 4196 Qcow2AmendHelperCBInfo helper_cb_info; 4197 4198 while (desc && desc->name) { 4199 if (!qemu_opt_find(opts, desc->name)) { 4200 /* only change explicitly defined options */ 4201 desc++; 4202 continue; 4203 } 4204 4205 if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) { 4206 compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL); 4207 if (!compat) { 4208 /* preserve default */ 4209 } else if (!strcmp(compat, "0.10")) { 4210 new_version = 2; 4211 } else if (!strcmp(compat, "1.1")) { 4212 new_version = 3; 4213 } else { 4214 error_report("Unknown compatibility level %s", compat); 4215 return -EINVAL; 4216 } 4217 } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) { 4218 error_report("Cannot change preallocation mode"); 4219 return -ENOTSUP; 4220 } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { 4221 new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 4222 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) { 4223 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 4224 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) { 4225 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 4226 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) { 4227 encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, 4228 !!s->crypto); 4229 4230 if (encrypt != !!s->crypto) { 4231 error_report("Changing the encryption flag is not supported"); 4232 return -ENOTSUP; 4233 } 4234 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) { 4235 encformat = qcow2_crypt_method_from_format( 4236 qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT)); 4237 4238 if (encformat != s->crypt_method_header) { 4239 error_report("Changing the encryption format is not supported"); 4240 return -ENOTSUP; 4241 } 4242 } else if (g_str_has_prefix(desc->name, "encrypt.")) { 4243 error_report("Changing the encryption parameters is not supported"); 4244 return -ENOTSUP; 4245 } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { 4246 cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 4247 cluster_size); 4248 if (cluster_size != s->cluster_size) { 4249 error_report("Changing the cluster size is not supported"); 4250 return -ENOTSUP; 4251 } 4252 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { 4253 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, 4254 lazy_refcounts); 4255 } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { 4256 refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS, 4257 refcount_bits); 4258 4259 if (refcount_bits <= 0 || refcount_bits > 64 || 4260 !is_power_of_2(refcount_bits)) 4261 { 4262 error_report("Refcount width must be a power of two and may " 4263 "not exceed 64 bits"); 4264 return -EINVAL; 4265 } 4266 } else { 4267 /* if this point is reached, this probably means a new option was 4268 * added without having it covered here */ 4269 abort(); 4270 } 4271 4272 desc++; 4273 } 4274 4275 helper_cb_info = (Qcow2AmendHelperCBInfo){ 4276 .original_status_cb = status_cb, 4277 .original_cb_opaque = cb_opaque, 4278 .total_operations = (new_version < old_version) 4279 + (s->refcount_bits != refcount_bits) 4280 }; 4281 4282 /* Upgrade first (some features may require compat=1.1) */ 4283 if (new_version > old_version) { 4284 s->qcow_version = new_version; 4285 ret = qcow2_update_header(bs); 4286 if (ret < 0) { 4287 s->qcow_version = old_version; 4288 return ret; 4289 } 4290 } 4291 4292 if (s->refcount_bits != refcount_bits) { 4293 int refcount_order = ctz32(refcount_bits); 4294 4295 if (new_version < 3 && refcount_bits != 16) { 4296 error_report("Different refcount widths than 16 bits require " 4297 "compatibility level 1.1 or above (use compat=1.1 or " 4298 "greater)"); 4299 return -EINVAL; 4300 } 4301 4302 helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; 4303 ret = qcow2_change_refcount_order(bs, refcount_order, 4304 &qcow2_amend_helper_cb, 4305 &helper_cb_info, &local_err); 4306 if (ret < 0) { 4307 error_report_err(local_err); 4308 return ret; 4309 } 4310 } 4311 4312 if (backing_file || backing_format) { 4313 ret = qcow2_change_backing_file(bs, 4314 backing_file ?: s->image_backing_file, 4315 backing_format ?: s->image_backing_format); 4316 if (ret < 0) { 4317 return ret; 4318 } 4319 } 4320 4321 if (s->use_lazy_refcounts != lazy_refcounts) { 4322 if (lazy_refcounts) { 4323 if (new_version < 3) { 4324 error_report("Lazy refcounts only supported with compatibility " 4325 "level 1.1 and above (use compat=1.1 or greater)"); 4326 return -EINVAL; 4327 } 4328 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 4329 ret = qcow2_update_header(bs); 4330 if (ret < 0) { 4331 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 4332 return ret; 4333 } 4334 s->use_lazy_refcounts = true; 4335 } else { 4336 /* make image clean first */ 4337 ret = qcow2_mark_clean(bs); 4338 if (ret < 0) { 4339 return ret; 4340 } 4341 /* now disallow lazy refcounts */ 4342 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 4343 ret = qcow2_update_header(bs); 4344 if (ret < 0) { 4345 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 4346 return ret; 4347 } 4348 s->use_lazy_refcounts = false; 4349 } 4350 } 4351 4352 if (new_size) { 4353 BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); 4354 ret = blk_insert_bs(blk, bs, &local_err); 4355 if (ret < 0) { 4356 error_report_err(local_err); 4357 blk_unref(blk); 4358 return ret; 4359 } 4360 4361 ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, &local_err); 4362 blk_unref(blk); 4363 if (ret < 0) { 4364 error_report_err(local_err); 4365 return ret; 4366 } 4367 } 4368 4369 /* Downgrade last (so unsupported features can be removed before) */ 4370 if (new_version < old_version) { 4371 helper_cb_info.current_operation = QCOW2_DOWNGRADING; 4372 ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb, 4373 &helper_cb_info); 4374 if (ret < 0) { 4375 return ret; 4376 } 4377 } 4378 4379 return 0; 4380 } 4381 4382 /* 4383 * If offset or size are negative, respectively, they will not be included in 4384 * the BLOCK_IMAGE_CORRUPTED event emitted. 4385 * fatal will be ignored for read-only BDS; corruptions found there will always 4386 * be considered non-fatal. 4387 */ 4388 void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, 4389 int64_t size, const char *message_format, ...) 4390 { 4391 BDRVQcow2State *s = bs->opaque; 4392 const char *node_name; 4393 char *message; 4394 va_list ap; 4395 4396 fatal = fatal && !bs->read_only; 4397 4398 if (s->signaled_corruption && 4399 (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT))) 4400 { 4401 return; 4402 } 4403 4404 va_start(ap, message_format); 4405 message = g_strdup_vprintf(message_format, ap); 4406 va_end(ap); 4407 4408 if (fatal) { 4409 fprintf(stderr, "qcow2: Marking image as corrupt: %s; further " 4410 "corruption events will be suppressed\n", message); 4411 } else { 4412 fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal " 4413 "corruption events will be suppressed\n", message); 4414 } 4415 4416 node_name = bdrv_get_node_name(bs); 4417 qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), 4418 *node_name != '\0', node_name, 4419 message, offset >= 0, offset, 4420 size >= 0, size, 4421 fatal, &error_abort); 4422 g_free(message); 4423 4424 if (fatal) { 4425 qcow2_mark_corrupt(bs); 4426 bs->drv = NULL; /* make BDS unusable */ 4427 } 4428 4429 s->signaled_corruption = true; 4430 } 4431 4432 static QemuOptsList qcow2_create_opts = { 4433 .name = "qcow2-create-opts", 4434 .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head), 4435 .desc = { 4436 { 4437 .name = BLOCK_OPT_SIZE, 4438 .type = QEMU_OPT_SIZE, 4439 .help = "Virtual disk size" 4440 }, 4441 { 4442 .name = BLOCK_OPT_COMPAT_LEVEL, 4443 .type = QEMU_OPT_STRING, 4444 .help = "Compatibility level (0.10 or 1.1)" 4445 }, 4446 { 4447 .name = BLOCK_OPT_BACKING_FILE, 4448 .type = QEMU_OPT_STRING, 4449 .help = "File name of a base image" 4450 }, 4451 { 4452 .name = BLOCK_OPT_BACKING_FMT, 4453 .type = QEMU_OPT_STRING, 4454 .help = "Image format of the base image" 4455 }, 4456 { 4457 .name = BLOCK_OPT_ENCRYPT, 4458 .type = QEMU_OPT_BOOL, 4459 .help = "Encrypt the image with format 'aes'. (Deprecated " 4460 "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", 4461 }, 4462 { 4463 .name = BLOCK_OPT_ENCRYPT_FORMAT, 4464 .type = QEMU_OPT_STRING, 4465 .help = "Encrypt the image, format choices: 'aes', 'luks'", 4466 }, 4467 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", 4468 "ID of secret providing qcow AES key or LUKS passphrase"), 4469 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."), 4470 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."), 4471 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."), 4472 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."), 4473 BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."), 4474 BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), 4475 { 4476 .name = BLOCK_OPT_CLUSTER_SIZE, 4477 .type = QEMU_OPT_SIZE, 4478 .help = "qcow2 cluster size", 4479 .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) 4480 }, 4481 { 4482 .name = BLOCK_OPT_PREALLOC, 4483 .type = QEMU_OPT_STRING, 4484 .help = "Preallocation mode (allowed values: off, metadata, " 4485 "falloc, full)" 4486 }, 4487 { 4488 .name = BLOCK_OPT_LAZY_REFCOUNTS, 4489 .type = QEMU_OPT_BOOL, 4490 .help = "Postpone refcount updates", 4491 .def_value_str = "off" 4492 }, 4493 { 4494 .name = BLOCK_OPT_REFCOUNT_BITS, 4495 .type = QEMU_OPT_NUMBER, 4496 .help = "Width of a reference count entry in bits", 4497 .def_value_str = "16" 4498 }, 4499 { /* end of list */ } 4500 } 4501 }; 4502 4503 BlockDriver bdrv_qcow2 = { 4504 .format_name = "qcow2", 4505 .instance_size = sizeof(BDRVQcow2State), 4506 .bdrv_probe = qcow2_probe, 4507 .bdrv_open = qcow2_open, 4508 .bdrv_close = qcow2_close, 4509 .bdrv_reopen_prepare = qcow2_reopen_prepare, 4510 .bdrv_reopen_commit = qcow2_reopen_commit, 4511 .bdrv_reopen_abort = qcow2_reopen_abort, 4512 .bdrv_join_options = qcow2_join_options, 4513 .bdrv_child_perm = bdrv_format_default_perms, 4514 .bdrv_co_create_opts = qcow2_co_create_opts, 4515 .bdrv_co_create = qcow2_co_create, 4516 .bdrv_has_zero_init = bdrv_has_zero_init_1, 4517 .bdrv_co_block_status = qcow2_co_block_status, 4518 4519 .bdrv_co_preadv = qcow2_co_preadv, 4520 .bdrv_co_pwritev = qcow2_co_pwritev, 4521 .bdrv_co_flush_to_os = qcow2_co_flush_to_os, 4522 4523 .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, 4524 .bdrv_co_pdiscard = qcow2_co_pdiscard, 4525 .bdrv_truncate = qcow2_truncate, 4526 .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed, 4527 .bdrv_make_empty = qcow2_make_empty, 4528 4529 .bdrv_snapshot_create = qcow2_snapshot_create, 4530 .bdrv_snapshot_goto = qcow2_snapshot_goto, 4531 .bdrv_snapshot_delete = qcow2_snapshot_delete, 4532 .bdrv_snapshot_list = qcow2_snapshot_list, 4533 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, 4534 .bdrv_measure = qcow2_measure, 4535 .bdrv_get_info = qcow2_get_info, 4536 .bdrv_get_specific_info = qcow2_get_specific_info, 4537 4538 .bdrv_save_vmstate = qcow2_save_vmstate, 4539 .bdrv_load_vmstate = qcow2_load_vmstate, 4540 4541 .supports_backing = true, 4542 .bdrv_change_backing_file = qcow2_change_backing_file, 4543 4544 .bdrv_refresh_limits = qcow2_refresh_limits, 4545 .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache, 4546 .bdrv_inactivate = qcow2_inactivate, 4547 4548 .create_opts = &qcow2_create_opts, 4549 .bdrv_co_check = qcow2_co_check, 4550 .bdrv_amend_options = qcow2_amend_options, 4551 4552 .bdrv_detach_aio_context = qcow2_detach_aio_context, 4553 .bdrv_attach_aio_context = qcow2_attach_aio_context, 4554 4555 .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw, 4556 .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap, 4557 .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap, 4558 }; 4559 4560 static void bdrv_qcow2_init(void) 4561 { 4562 bdrv_register(&bdrv_qcow2); 4563 } 4564 4565 block_init(bdrv_qcow2_init); 4566