1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 #define ZLIB_CONST 28 #include <zlib.h> 29 30 #include "block/block_int.h" 31 #include "block/qdict.h" 32 #include "sysemu/block-backend.h" 33 #include "qemu/module.h" 34 #include "qcow2.h" 35 #include "qemu/error-report.h" 36 #include "qapi/error.h" 37 #include "qapi/qapi-events-block-core.h" 38 #include "qapi/qmp/qdict.h" 39 #include "qapi/qmp/qstring.h" 40 #include "trace.h" 41 #include "qemu/option_int.h" 42 #include "qemu/cutils.h" 43 #include "qemu/bswap.h" 44 #include "qapi/qobject-input-visitor.h" 45 #include "qapi/qapi-visit-block-core.h" 46 #include "crypto.h" 47 #include "block/thread-pool.h" 48 49 /* 50 Differences with QCOW: 51 52 - Support for multiple incremental snapshots. 53 - Memory management by reference counts. 54 - Clusters which have a reference count of one have the bit 55 QCOW_OFLAG_COPIED to optimize write performance. 56 - Size of compressed clusters is stored in sectors to reduce bit usage 57 in the cluster offsets. 58 - Support for storing additional data (such as the VM state) in the 59 snapshots. 60 - If a backing store is used, the cluster size is not constrained 61 (could be backported to QCOW). 62 - L2 tables have always a size of one cluster. 63 */ 64 65 66 typedef struct { 67 uint32_t magic; 68 uint32_t len; 69 } QEMU_PACKED QCowExtension; 70 71 #define QCOW2_EXT_MAGIC_END 0 72 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA 73 #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 74 #define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77 75 #define QCOW2_EXT_MAGIC_BITMAPS 0x23852875 76 #define QCOW2_EXT_MAGIC_DATA_FILE 0x44415441 77 78 static int coroutine_fn 79 qcow2_co_preadv_compressed(BlockDriverState *bs, 80 uint64_t file_cluster_offset, 81 uint64_t offset, 82 uint64_t bytes, 83 QEMUIOVector *qiov); 84 85 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) 86 { 87 const QCowHeader *cow_header = (const void *)buf; 88 89 if (buf_size >= sizeof(QCowHeader) && 90 be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 91 be32_to_cpu(cow_header->version) >= 2) 92 return 100; 93 else 94 return 0; 95 } 96 97 98 static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset, 99 uint8_t *buf, size_t buflen, 100 void *opaque, Error **errp) 101 { 102 BlockDriverState *bs = opaque; 103 BDRVQcow2State *s = bs->opaque; 104 ssize_t ret; 105 106 if ((offset + buflen) > s->crypto_header.length) { 107 error_setg(errp, "Request for data outside of extension header"); 108 return -1; 109 } 110 111 ret = bdrv_pread(bs->file, 112 s->crypto_header.offset + offset, buf, buflen); 113 if (ret < 0) { 114 error_setg_errno(errp, -ret, "Could not read encryption header"); 115 return -1; 116 } 117 return ret; 118 } 119 120 121 static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, 122 void *opaque, Error **errp) 123 { 124 BlockDriverState *bs = opaque; 125 BDRVQcow2State *s = bs->opaque; 126 int64_t ret; 127 int64_t clusterlen; 128 129 ret = qcow2_alloc_clusters(bs, headerlen); 130 if (ret < 0) { 131 error_setg_errno(errp, -ret, 132 "Cannot allocate cluster for LUKS header size %zu", 133 headerlen); 134 return -1; 135 } 136 137 s->crypto_header.length = headerlen; 138 s->crypto_header.offset = ret; 139 140 /* Zero fill remaining space in cluster so it has predictable 141 * content in case of future spec changes */ 142 clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; 143 assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0); 144 ret = bdrv_pwrite_zeroes(bs->file, 145 ret + headerlen, 146 clusterlen - headerlen, 0); 147 if (ret < 0) { 148 error_setg_errno(errp, -ret, "Could not zero fill encryption header"); 149 return -1; 150 } 151 152 return ret; 153 } 154 155 156 static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, 157 const uint8_t *buf, size_t buflen, 158 void *opaque, Error **errp) 159 { 160 BlockDriverState *bs = opaque; 161 BDRVQcow2State *s = bs->opaque; 162 ssize_t ret; 163 164 if ((offset + buflen) > s->crypto_header.length) { 165 error_setg(errp, "Request for data outside of extension header"); 166 return -1; 167 } 168 169 ret = bdrv_pwrite(bs->file, 170 s->crypto_header.offset + offset, buf, buflen); 171 if (ret < 0) { 172 error_setg_errno(errp, -ret, "Could not read encryption header"); 173 return -1; 174 } 175 return ret; 176 } 177 178 179 /* 180 * read qcow2 extension and fill bs 181 * start reading from start_offset 182 * finish reading upon magic of value 0 or when end_offset reached 183 * unknown magic is skipped (future extension this version knows nothing about) 184 * return 0 upon success, non-0 otherwise 185 */ 186 static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, 187 uint64_t end_offset, void **p_feature_table, 188 int flags, bool *need_update_header, 189 Error **errp) 190 { 191 BDRVQcow2State *s = bs->opaque; 192 QCowExtension ext; 193 uint64_t offset; 194 int ret; 195 Qcow2BitmapHeaderExt bitmaps_ext; 196 197 if (need_update_header != NULL) { 198 *need_update_header = false; 199 } 200 201 #ifdef DEBUG_EXT 202 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); 203 #endif 204 offset = start_offset; 205 while (offset < end_offset) { 206 207 #ifdef DEBUG_EXT 208 /* Sanity check */ 209 if (offset > s->cluster_size) 210 printf("qcow2_read_extension: suspicious offset %lu\n", offset); 211 212 printf("attempting to read extended header in offset %lu\n", offset); 213 #endif 214 215 ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext)); 216 if (ret < 0) { 217 error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " 218 "pread fail from offset %" PRIu64, offset); 219 return 1; 220 } 221 ext.magic = be32_to_cpu(ext.magic); 222 ext.len = be32_to_cpu(ext.len); 223 offset += sizeof(ext); 224 #ifdef DEBUG_EXT 225 printf("ext.magic = 0x%x\n", ext.magic); 226 #endif 227 if (offset > end_offset || ext.len > end_offset - offset) { 228 error_setg(errp, "Header extension too large"); 229 return -EINVAL; 230 } 231 232 switch (ext.magic) { 233 case QCOW2_EXT_MAGIC_END: 234 return 0; 235 236 case QCOW2_EXT_MAGIC_BACKING_FORMAT: 237 if (ext.len >= sizeof(bs->backing_format)) { 238 error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32 239 " too large (>=%zu)", ext.len, 240 sizeof(bs->backing_format)); 241 return 2; 242 } 243 ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len); 244 if (ret < 0) { 245 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " 246 "Could not read format name"); 247 return 3; 248 } 249 bs->backing_format[ext.len] = '\0'; 250 s->image_backing_format = g_strdup(bs->backing_format); 251 #ifdef DEBUG_EXT 252 printf("Qcow2: Got format extension %s\n", bs->backing_format); 253 #endif 254 break; 255 256 case QCOW2_EXT_MAGIC_FEATURE_TABLE: 257 if (p_feature_table != NULL) { 258 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); 259 ret = bdrv_pread(bs->file, offset , feature_table, ext.len); 260 if (ret < 0) { 261 error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " 262 "Could not read table"); 263 return ret; 264 } 265 266 *p_feature_table = feature_table; 267 } 268 break; 269 270 case QCOW2_EXT_MAGIC_CRYPTO_HEADER: { 271 unsigned int cflags = 0; 272 if (s->crypt_method_header != QCOW_CRYPT_LUKS) { 273 error_setg(errp, "CRYPTO header extension only " 274 "expected with LUKS encryption method"); 275 return -EINVAL; 276 } 277 if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) { 278 error_setg(errp, "CRYPTO header extension size %u, " 279 "but expected size %zu", ext.len, 280 sizeof(Qcow2CryptoHeaderExtension)); 281 return -EINVAL; 282 } 283 284 ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len); 285 if (ret < 0) { 286 error_setg_errno(errp, -ret, 287 "Unable to read CRYPTO header extension"); 288 return ret; 289 } 290 s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset); 291 s->crypto_header.length = be64_to_cpu(s->crypto_header.length); 292 293 if ((s->crypto_header.offset % s->cluster_size) != 0) { 294 error_setg(errp, "Encryption header offset '%" PRIu64 "' is " 295 "not a multiple of cluster size '%u'", 296 s->crypto_header.offset, s->cluster_size); 297 return -EINVAL; 298 } 299 300 if (flags & BDRV_O_NO_IO) { 301 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 302 } 303 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 304 qcow2_crypto_hdr_read_func, 305 bs, cflags, 1, errp); 306 if (!s->crypto) { 307 return -EINVAL; 308 } 309 } break; 310 311 case QCOW2_EXT_MAGIC_BITMAPS: 312 if (ext.len != sizeof(bitmaps_ext)) { 313 error_setg_errno(errp, -ret, "bitmaps_ext: " 314 "Invalid extension length"); 315 return -EINVAL; 316 } 317 318 if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) { 319 if (s->qcow_version < 3) { 320 /* Let's be a bit more specific */ 321 warn_report("This qcow2 v2 image contains bitmaps, but " 322 "they may have been modified by a program " 323 "without persistent bitmap support; so now " 324 "they must all be considered inconsistent"); 325 } else { 326 warn_report("a program lacking bitmap support " 327 "modified this file, so all bitmaps are now " 328 "considered inconsistent"); 329 } 330 error_printf("Some clusters may be leaked, " 331 "run 'qemu-img check -r' on the image " 332 "file to fix."); 333 if (need_update_header != NULL) { 334 /* Updating is needed to drop invalid bitmap extension. */ 335 *need_update_header = true; 336 } 337 break; 338 } 339 340 ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len); 341 if (ret < 0) { 342 error_setg_errno(errp, -ret, "bitmaps_ext: " 343 "Could not read ext header"); 344 return ret; 345 } 346 347 if (bitmaps_ext.reserved32 != 0) { 348 error_setg_errno(errp, -ret, "bitmaps_ext: " 349 "Reserved field is not zero"); 350 return -EINVAL; 351 } 352 353 bitmaps_ext.nb_bitmaps = be32_to_cpu(bitmaps_ext.nb_bitmaps); 354 bitmaps_ext.bitmap_directory_size = 355 be64_to_cpu(bitmaps_ext.bitmap_directory_size); 356 bitmaps_ext.bitmap_directory_offset = 357 be64_to_cpu(bitmaps_ext.bitmap_directory_offset); 358 359 if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) { 360 error_setg(errp, 361 "bitmaps_ext: Image has %" PRIu32 " bitmaps, " 362 "exceeding the QEMU supported maximum of %d", 363 bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS); 364 return -EINVAL; 365 } 366 367 if (bitmaps_ext.nb_bitmaps == 0) { 368 error_setg(errp, "found bitmaps extension with zero bitmaps"); 369 return -EINVAL; 370 } 371 372 if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) { 373 error_setg(errp, "bitmaps_ext: " 374 "invalid bitmap directory offset"); 375 return -EINVAL; 376 } 377 378 if (bitmaps_ext.bitmap_directory_size > 379 QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { 380 error_setg(errp, "bitmaps_ext: " 381 "bitmap directory size (%" PRIu64 ") exceeds " 382 "the maximum supported size (%d)", 383 bitmaps_ext.bitmap_directory_size, 384 QCOW2_MAX_BITMAP_DIRECTORY_SIZE); 385 return -EINVAL; 386 } 387 388 s->nb_bitmaps = bitmaps_ext.nb_bitmaps; 389 s->bitmap_directory_offset = 390 bitmaps_ext.bitmap_directory_offset; 391 s->bitmap_directory_size = 392 bitmaps_ext.bitmap_directory_size; 393 394 #ifdef DEBUG_EXT 395 printf("Qcow2: Got bitmaps extension: " 396 "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n", 397 s->bitmap_directory_offset, s->nb_bitmaps); 398 #endif 399 break; 400 401 case QCOW2_EXT_MAGIC_DATA_FILE: 402 { 403 s->image_data_file = g_malloc0(ext.len + 1); 404 ret = bdrv_pread(bs->file, offset, s->image_data_file, ext.len); 405 if (ret < 0) { 406 error_setg_errno(errp, -ret, 407 "ERROR: Could not read data file name"); 408 return ret; 409 } 410 #ifdef DEBUG_EXT 411 printf("Qcow2: Got external data file %s\n", s->image_data_file); 412 #endif 413 break; 414 } 415 416 default: 417 /* unknown magic - save it in case we need to rewrite the header */ 418 /* If you add a new feature, make sure to also update the fast 419 * path of qcow2_make_empty() to deal with it. */ 420 { 421 Qcow2UnknownHeaderExtension *uext; 422 423 uext = g_malloc0(sizeof(*uext) + ext.len); 424 uext->magic = ext.magic; 425 uext->len = ext.len; 426 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); 427 428 ret = bdrv_pread(bs->file, offset , uext->data, uext->len); 429 if (ret < 0) { 430 error_setg_errno(errp, -ret, "ERROR: unknown extension: " 431 "Could not read data"); 432 return ret; 433 } 434 } 435 break; 436 } 437 438 offset += ((ext.len + 7) & ~7); 439 } 440 441 return 0; 442 } 443 444 static void cleanup_unknown_header_ext(BlockDriverState *bs) 445 { 446 BDRVQcow2State *s = bs->opaque; 447 Qcow2UnknownHeaderExtension *uext, *next; 448 449 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { 450 QLIST_REMOVE(uext, next); 451 g_free(uext); 452 } 453 } 454 455 static void report_unsupported_feature(Error **errp, Qcow2Feature *table, 456 uint64_t mask) 457 { 458 char *features = g_strdup(""); 459 char *old; 460 461 while (table && table->name[0] != '\0') { 462 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { 463 if (mask & (1ULL << table->bit)) { 464 old = features; 465 features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "", 466 table->name); 467 g_free(old); 468 mask &= ~(1ULL << table->bit); 469 } 470 } 471 table++; 472 } 473 474 if (mask) { 475 old = features; 476 features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64, 477 old, *old ? ", " : "", mask); 478 g_free(old); 479 } 480 481 error_setg(errp, "Unsupported qcow2 feature(s): %s", features); 482 g_free(features); 483 } 484 485 /* 486 * Sets the dirty bit and flushes afterwards if necessary. 487 * 488 * The incompatible_features bit is only set if the image file header was 489 * updated successfully. Therefore it is not required to check the return 490 * value of this function. 491 */ 492 int qcow2_mark_dirty(BlockDriverState *bs) 493 { 494 BDRVQcow2State *s = bs->opaque; 495 uint64_t val; 496 int ret; 497 498 assert(s->qcow_version >= 3); 499 500 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 501 return 0; /* already dirty */ 502 } 503 504 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); 505 ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), 506 &val, sizeof(val)); 507 if (ret < 0) { 508 return ret; 509 } 510 ret = bdrv_flush(bs->file->bs); 511 if (ret < 0) { 512 return ret; 513 } 514 515 /* Only treat image as dirty if the header was updated successfully */ 516 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY; 517 return 0; 518 } 519 520 /* 521 * Clears the dirty bit and flushes before if necessary. Only call this 522 * function when there are no pending requests, it does not guard against 523 * concurrent requests dirtying the image. 524 */ 525 static int qcow2_mark_clean(BlockDriverState *bs) 526 { 527 BDRVQcow2State *s = bs->opaque; 528 529 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 530 int ret; 531 532 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY; 533 534 ret = qcow2_flush_caches(bs); 535 if (ret < 0) { 536 return ret; 537 } 538 539 return qcow2_update_header(bs); 540 } 541 return 0; 542 } 543 544 /* 545 * Marks the image as corrupt. 546 */ 547 int qcow2_mark_corrupt(BlockDriverState *bs) 548 { 549 BDRVQcow2State *s = bs->opaque; 550 551 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; 552 return qcow2_update_header(bs); 553 } 554 555 /* 556 * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes 557 * before if necessary. 558 */ 559 int qcow2_mark_consistent(BlockDriverState *bs) 560 { 561 BDRVQcow2State *s = bs->opaque; 562 563 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 564 int ret = qcow2_flush_caches(bs); 565 if (ret < 0) { 566 return ret; 567 } 568 569 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT; 570 return qcow2_update_header(bs); 571 } 572 return 0; 573 } 574 575 static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs, 576 BdrvCheckResult *result, 577 BdrvCheckMode fix) 578 { 579 int ret = qcow2_check_refcounts(bs, result, fix); 580 if (ret < 0) { 581 return ret; 582 } 583 584 if (fix && result->check_errors == 0 && result->corruptions == 0) { 585 ret = qcow2_mark_clean(bs); 586 if (ret < 0) { 587 return ret; 588 } 589 return qcow2_mark_consistent(bs); 590 } 591 return ret; 592 } 593 594 static int coroutine_fn qcow2_co_check(BlockDriverState *bs, 595 BdrvCheckResult *result, 596 BdrvCheckMode fix) 597 { 598 BDRVQcow2State *s = bs->opaque; 599 int ret; 600 601 qemu_co_mutex_lock(&s->lock); 602 ret = qcow2_co_check_locked(bs, result, fix); 603 qemu_co_mutex_unlock(&s->lock); 604 return ret; 605 } 606 607 int qcow2_validate_table(BlockDriverState *bs, uint64_t offset, 608 uint64_t entries, size_t entry_len, 609 int64_t max_size_bytes, const char *table_name, 610 Error **errp) 611 { 612 BDRVQcow2State *s = bs->opaque; 613 614 if (entries > max_size_bytes / entry_len) { 615 error_setg(errp, "%s too large", table_name); 616 return -EFBIG; 617 } 618 619 /* Use signed INT64_MAX as the maximum even for uint64_t header fields, 620 * because values will be passed to qemu functions taking int64_t. */ 621 if ((INT64_MAX - entries * entry_len < offset) || 622 (offset_into_cluster(s, offset) != 0)) { 623 error_setg(errp, "%s offset invalid", table_name); 624 return -EINVAL; 625 } 626 627 return 0; 628 } 629 630 static QemuOptsList qcow2_runtime_opts = { 631 .name = "qcow2", 632 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), 633 .desc = { 634 { 635 .name = QCOW2_OPT_LAZY_REFCOUNTS, 636 .type = QEMU_OPT_BOOL, 637 .help = "Postpone refcount updates", 638 }, 639 { 640 .name = QCOW2_OPT_DISCARD_REQUEST, 641 .type = QEMU_OPT_BOOL, 642 .help = "Pass guest discard requests to the layer below", 643 }, 644 { 645 .name = QCOW2_OPT_DISCARD_SNAPSHOT, 646 .type = QEMU_OPT_BOOL, 647 .help = "Generate discard requests when snapshot related space " 648 "is freed", 649 }, 650 { 651 .name = QCOW2_OPT_DISCARD_OTHER, 652 .type = QEMU_OPT_BOOL, 653 .help = "Generate discard requests when other clusters are freed", 654 }, 655 { 656 .name = QCOW2_OPT_OVERLAP, 657 .type = QEMU_OPT_STRING, 658 .help = "Selects which overlap checks to perform from a range of " 659 "templates (none, constant, cached, all)", 660 }, 661 { 662 .name = QCOW2_OPT_OVERLAP_TEMPLATE, 663 .type = QEMU_OPT_STRING, 664 .help = "Selects which overlap checks to perform from a range of " 665 "templates (none, constant, cached, all)", 666 }, 667 { 668 .name = QCOW2_OPT_OVERLAP_MAIN_HEADER, 669 .type = QEMU_OPT_BOOL, 670 .help = "Check for unintended writes into the main qcow2 header", 671 }, 672 { 673 .name = QCOW2_OPT_OVERLAP_ACTIVE_L1, 674 .type = QEMU_OPT_BOOL, 675 .help = "Check for unintended writes into the active L1 table", 676 }, 677 { 678 .name = QCOW2_OPT_OVERLAP_ACTIVE_L2, 679 .type = QEMU_OPT_BOOL, 680 .help = "Check for unintended writes into an active L2 table", 681 }, 682 { 683 .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 684 .type = QEMU_OPT_BOOL, 685 .help = "Check for unintended writes into the refcount table", 686 }, 687 { 688 .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 689 .type = QEMU_OPT_BOOL, 690 .help = "Check for unintended writes into a refcount block", 691 }, 692 { 693 .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 694 .type = QEMU_OPT_BOOL, 695 .help = "Check for unintended writes into the snapshot table", 696 }, 697 { 698 .name = QCOW2_OPT_OVERLAP_INACTIVE_L1, 699 .type = QEMU_OPT_BOOL, 700 .help = "Check for unintended writes into an inactive L1 table", 701 }, 702 { 703 .name = QCOW2_OPT_OVERLAP_INACTIVE_L2, 704 .type = QEMU_OPT_BOOL, 705 .help = "Check for unintended writes into an inactive L2 table", 706 }, 707 { 708 .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, 709 .type = QEMU_OPT_BOOL, 710 .help = "Check for unintended writes into the bitmap directory", 711 }, 712 { 713 .name = QCOW2_OPT_CACHE_SIZE, 714 .type = QEMU_OPT_SIZE, 715 .help = "Maximum combined metadata (L2 tables and refcount blocks) " 716 "cache size", 717 }, 718 { 719 .name = QCOW2_OPT_L2_CACHE_SIZE, 720 .type = QEMU_OPT_SIZE, 721 .help = "Maximum L2 table cache size", 722 }, 723 { 724 .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE, 725 .type = QEMU_OPT_SIZE, 726 .help = "Size of each entry in the L2 cache", 727 }, 728 { 729 .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE, 730 .type = QEMU_OPT_SIZE, 731 .help = "Maximum refcount block cache size", 732 }, 733 { 734 .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, 735 .type = QEMU_OPT_NUMBER, 736 .help = "Clean unused cache entries after this time (in seconds)", 737 }, 738 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", 739 "ID of secret providing qcow2 AES key or LUKS passphrase"), 740 { /* end of list */ } 741 }, 742 }; 743 744 static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { 745 [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER, 746 [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1, 747 [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2, 748 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 749 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 750 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 751 [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1, 752 [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, 753 [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, 754 }; 755 756 static void cache_clean_timer_cb(void *opaque) 757 { 758 BlockDriverState *bs = opaque; 759 BDRVQcow2State *s = bs->opaque; 760 qcow2_cache_clean_unused(s->l2_table_cache); 761 qcow2_cache_clean_unused(s->refcount_block_cache); 762 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 763 (int64_t) s->cache_clean_interval * 1000); 764 } 765 766 static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) 767 { 768 BDRVQcow2State *s = bs->opaque; 769 if (s->cache_clean_interval > 0) { 770 s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, 771 SCALE_MS, cache_clean_timer_cb, 772 bs); 773 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 774 (int64_t) s->cache_clean_interval * 1000); 775 } 776 } 777 778 static void cache_clean_timer_del(BlockDriverState *bs) 779 { 780 BDRVQcow2State *s = bs->opaque; 781 if (s->cache_clean_timer) { 782 timer_del(s->cache_clean_timer); 783 timer_free(s->cache_clean_timer); 784 s->cache_clean_timer = NULL; 785 } 786 } 787 788 static void qcow2_detach_aio_context(BlockDriverState *bs) 789 { 790 cache_clean_timer_del(bs); 791 } 792 793 static void qcow2_attach_aio_context(BlockDriverState *bs, 794 AioContext *new_context) 795 { 796 cache_clean_timer_init(bs, new_context); 797 } 798 799 static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, 800 uint64_t *l2_cache_size, 801 uint64_t *l2_cache_entry_size, 802 uint64_t *refcount_cache_size, Error **errp) 803 { 804 BDRVQcow2State *s = bs->opaque; 805 uint64_t combined_cache_size, l2_cache_max_setting; 806 bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; 807 bool l2_cache_entry_size_set; 808 int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; 809 uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; 810 uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); 811 812 combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); 813 l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); 814 refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 815 l2_cache_entry_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE); 816 817 combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0); 818 l2_cache_max_setting = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 819 DEFAULT_L2_CACHE_MAX_SIZE); 820 *refcount_cache_size = qemu_opt_get_size(opts, 821 QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); 822 823 *l2_cache_entry_size = qemu_opt_get_size( 824 opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size); 825 826 *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting); 827 828 if (combined_cache_size_set) { 829 if (l2_cache_size_set && refcount_cache_size_set) { 830 error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE 831 " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set " 832 "at the same time"); 833 return; 834 } else if (l2_cache_size_set && 835 (l2_cache_max_setting > combined_cache_size)) { 836 error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed " 837 QCOW2_OPT_CACHE_SIZE); 838 return; 839 } else if (*refcount_cache_size > combined_cache_size) { 840 error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed " 841 QCOW2_OPT_CACHE_SIZE); 842 return; 843 } 844 845 if (l2_cache_size_set) { 846 *refcount_cache_size = combined_cache_size - *l2_cache_size; 847 } else if (refcount_cache_size_set) { 848 *l2_cache_size = combined_cache_size - *refcount_cache_size; 849 } else { 850 /* Assign as much memory as possible to the L2 cache, and 851 * use the remainder for the refcount cache */ 852 if (combined_cache_size >= max_l2_cache + min_refcount_cache) { 853 *l2_cache_size = max_l2_cache; 854 *refcount_cache_size = combined_cache_size - *l2_cache_size; 855 } else { 856 *refcount_cache_size = 857 MIN(combined_cache_size, min_refcount_cache); 858 *l2_cache_size = combined_cache_size - *refcount_cache_size; 859 } 860 } 861 } 862 863 /* 864 * If the L2 cache is not enough to cover the whole disk then 865 * default to 4KB entries. Smaller entries reduce the cost of 866 * loads and evictions and increase I/O performance. 867 */ 868 if (*l2_cache_size < max_l2_cache && !l2_cache_entry_size_set) { 869 *l2_cache_entry_size = MIN(s->cluster_size, 4096); 870 } 871 872 /* l2_cache_size and refcount_cache_size are ensured to have at least 873 * their minimum values in qcow2_update_options_prepare() */ 874 875 if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) || 876 *l2_cache_entry_size > s->cluster_size || 877 !is_power_of_2(*l2_cache_entry_size)) { 878 error_setg(errp, "L2 cache entry size must be a power of two " 879 "between %d and the cluster size (%d)", 880 1 << MIN_CLUSTER_BITS, s->cluster_size); 881 return; 882 } 883 } 884 885 typedef struct Qcow2ReopenState { 886 Qcow2Cache *l2_table_cache; 887 Qcow2Cache *refcount_block_cache; 888 int l2_slice_size; /* Number of entries in a slice of the L2 table */ 889 bool use_lazy_refcounts; 890 int overlap_check; 891 bool discard_passthrough[QCOW2_DISCARD_MAX]; 892 uint64_t cache_clean_interval; 893 QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ 894 } Qcow2ReopenState; 895 896 static int qcow2_update_options_prepare(BlockDriverState *bs, 897 Qcow2ReopenState *r, 898 QDict *options, int flags, 899 Error **errp) 900 { 901 BDRVQcow2State *s = bs->opaque; 902 QemuOpts *opts = NULL; 903 const char *opt_overlap_check, *opt_overlap_check_template; 904 int overlap_check_template = 0; 905 uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size; 906 int i; 907 const char *encryptfmt; 908 QDict *encryptopts = NULL; 909 Error *local_err = NULL; 910 int ret; 911 912 qdict_extract_subqdict(options, &encryptopts, "encrypt."); 913 encryptfmt = qdict_get_try_str(encryptopts, "format"); 914 915 opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); 916 qemu_opts_absorb_qdict(opts, options, &local_err); 917 if (local_err) { 918 error_propagate(errp, local_err); 919 ret = -EINVAL; 920 goto fail; 921 } 922 923 /* get L2 table/refcount block cache size from command line options */ 924 read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size, 925 &refcount_cache_size, &local_err); 926 if (local_err) { 927 error_propagate(errp, local_err); 928 ret = -EINVAL; 929 goto fail; 930 } 931 932 l2_cache_size /= l2_cache_entry_size; 933 if (l2_cache_size < MIN_L2_CACHE_SIZE) { 934 l2_cache_size = MIN_L2_CACHE_SIZE; 935 } 936 if (l2_cache_size > INT_MAX) { 937 error_setg(errp, "L2 cache size too big"); 938 ret = -EINVAL; 939 goto fail; 940 } 941 942 refcount_cache_size /= s->cluster_size; 943 if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { 944 refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; 945 } 946 if (refcount_cache_size > INT_MAX) { 947 error_setg(errp, "Refcount cache size too big"); 948 ret = -EINVAL; 949 goto fail; 950 } 951 952 /* alloc new L2 table/refcount block cache, flush old one */ 953 if (s->l2_table_cache) { 954 ret = qcow2_cache_flush(bs, s->l2_table_cache); 955 if (ret) { 956 error_setg_errno(errp, -ret, "Failed to flush the L2 table cache"); 957 goto fail; 958 } 959 } 960 961 if (s->refcount_block_cache) { 962 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 963 if (ret) { 964 error_setg_errno(errp, -ret, 965 "Failed to flush the refcount block cache"); 966 goto fail; 967 } 968 } 969 970 r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t); 971 r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size, 972 l2_cache_entry_size); 973 r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size, 974 s->cluster_size); 975 if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { 976 error_setg(errp, "Could not allocate metadata caches"); 977 ret = -ENOMEM; 978 goto fail; 979 } 980 981 /* New interval for cache cleanup timer */ 982 r->cache_clean_interval = 983 qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 984 DEFAULT_CACHE_CLEAN_INTERVAL); 985 #ifndef CONFIG_LINUX 986 if (r->cache_clean_interval != 0) { 987 error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL 988 " not supported on this host"); 989 ret = -EINVAL; 990 goto fail; 991 } 992 #endif 993 if (r->cache_clean_interval > UINT_MAX) { 994 error_setg(errp, "Cache clean interval too big"); 995 ret = -EINVAL; 996 goto fail; 997 } 998 999 /* lazy-refcounts; flush if going from enabled to disabled */ 1000 r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, 1001 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); 1002 if (r->use_lazy_refcounts && s->qcow_version < 3) { 1003 error_setg(errp, "Lazy refcounts require a qcow2 image with at least " 1004 "qemu 1.1 compatibility level"); 1005 ret = -EINVAL; 1006 goto fail; 1007 } 1008 1009 if (s->use_lazy_refcounts && !r->use_lazy_refcounts) { 1010 ret = qcow2_mark_clean(bs); 1011 if (ret < 0) { 1012 error_setg_errno(errp, -ret, "Failed to disable lazy refcounts"); 1013 goto fail; 1014 } 1015 } 1016 1017 /* Overlap check options */ 1018 opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); 1019 opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); 1020 if (opt_overlap_check_template && opt_overlap_check && 1021 strcmp(opt_overlap_check_template, opt_overlap_check)) 1022 { 1023 error_setg(errp, "Conflicting values for qcow2 options '" 1024 QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE 1025 "' ('%s')", opt_overlap_check, opt_overlap_check_template); 1026 ret = -EINVAL; 1027 goto fail; 1028 } 1029 if (!opt_overlap_check) { 1030 opt_overlap_check = opt_overlap_check_template ?: "cached"; 1031 } 1032 1033 if (!strcmp(opt_overlap_check, "none")) { 1034 overlap_check_template = 0; 1035 } else if (!strcmp(opt_overlap_check, "constant")) { 1036 overlap_check_template = QCOW2_OL_CONSTANT; 1037 } else if (!strcmp(opt_overlap_check, "cached")) { 1038 overlap_check_template = QCOW2_OL_CACHED; 1039 } else if (!strcmp(opt_overlap_check, "all")) { 1040 overlap_check_template = QCOW2_OL_ALL; 1041 } else { 1042 error_setg(errp, "Unsupported value '%s' for qcow2 option " 1043 "'overlap-check'. Allowed are any of the following: " 1044 "none, constant, cached, all", opt_overlap_check); 1045 ret = -EINVAL; 1046 goto fail; 1047 } 1048 1049 r->overlap_check = 0; 1050 for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { 1051 /* overlap-check defines a template bitmask, but every flag may be 1052 * overwritten through the associated boolean option */ 1053 r->overlap_check |= 1054 qemu_opt_get_bool(opts, overlap_bool_option_names[i], 1055 overlap_check_template & (1 << i)) << i; 1056 } 1057 1058 r->discard_passthrough[QCOW2_DISCARD_NEVER] = false; 1059 r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; 1060 r->discard_passthrough[QCOW2_DISCARD_REQUEST] = 1061 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, 1062 flags & BDRV_O_UNMAP); 1063 r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = 1064 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); 1065 r->discard_passthrough[QCOW2_DISCARD_OTHER] = 1066 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); 1067 1068 switch (s->crypt_method_header) { 1069 case QCOW_CRYPT_NONE: 1070 if (encryptfmt) { 1071 error_setg(errp, "No encryption in image header, but options " 1072 "specified format '%s'", encryptfmt); 1073 ret = -EINVAL; 1074 goto fail; 1075 } 1076 break; 1077 1078 case QCOW_CRYPT_AES: 1079 if (encryptfmt && !g_str_equal(encryptfmt, "aes")) { 1080 error_setg(errp, 1081 "Header reported 'aes' encryption format but " 1082 "options specify '%s'", encryptfmt); 1083 ret = -EINVAL; 1084 goto fail; 1085 } 1086 qdict_put_str(encryptopts, "format", "qcow"); 1087 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); 1088 break; 1089 1090 case QCOW_CRYPT_LUKS: 1091 if (encryptfmt && !g_str_equal(encryptfmt, "luks")) { 1092 error_setg(errp, 1093 "Header reported 'luks' encryption format but " 1094 "options specify '%s'", encryptfmt); 1095 ret = -EINVAL; 1096 goto fail; 1097 } 1098 qdict_put_str(encryptopts, "format", "luks"); 1099 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); 1100 break; 1101 1102 default: 1103 error_setg(errp, "Unsupported encryption method %d", 1104 s->crypt_method_header); 1105 break; 1106 } 1107 if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) { 1108 ret = -EINVAL; 1109 goto fail; 1110 } 1111 1112 ret = 0; 1113 fail: 1114 qobject_unref(encryptopts); 1115 qemu_opts_del(opts); 1116 opts = NULL; 1117 return ret; 1118 } 1119 1120 static void qcow2_update_options_commit(BlockDriverState *bs, 1121 Qcow2ReopenState *r) 1122 { 1123 BDRVQcow2State *s = bs->opaque; 1124 int i; 1125 1126 if (s->l2_table_cache) { 1127 qcow2_cache_destroy(s->l2_table_cache); 1128 } 1129 if (s->refcount_block_cache) { 1130 qcow2_cache_destroy(s->refcount_block_cache); 1131 } 1132 s->l2_table_cache = r->l2_table_cache; 1133 s->refcount_block_cache = r->refcount_block_cache; 1134 s->l2_slice_size = r->l2_slice_size; 1135 1136 s->overlap_check = r->overlap_check; 1137 s->use_lazy_refcounts = r->use_lazy_refcounts; 1138 1139 for (i = 0; i < QCOW2_DISCARD_MAX; i++) { 1140 s->discard_passthrough[i] = r->discard_passthrough[i]; 1141 } 1142 1143 if (s->cache_clean_interval != r->cache_clean_interval) { 1144 cache_clean_timer_del(bs); 1145 s->cache_clean_interval = r->cache_clean_interval; 1146 cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); 1147 } 1148 1149 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 1150 s->crypto_opts = r->crypto_opts; 1151 } 1152 1153 static void qcow2_update_options_abort(BlockDriverState *bs, 1154 Qcow2ReopenState *r) 1155 { 1156 if (r->l2_table_cache) { 1157 qcow2_cache_destroy(r->l2_table_cache); 1158 } 1159 if (r->refcount_block_cache) { 1160 qcow2_cache_destroy(r->refcount_block_cache); 1161 } 1162 qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); 1163 } 1164 1165 static int qcow2_update_options(BlockDriverState *bs, QDict *options, 1166 int flags, Error **errp) 1167 { 1168 Qcow2ReopenState r = {}; 1169 int ret; 1170 1171 ret = qcow2_update_options_prepare(bs, &r, options, flags, errp); 1172 if (ret >= 0) { 1173 qcow2_update_options_commit(bs, &r); 1174 } else { 1175 qcow2_update_options_abort(bs, &r); 1176 } 1177 1178 return ret; 1179 } 1180 1181 /* Called with s->lock held. */ 1182 static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, 1183 int flags, Error **errp) 1184 { 1185 BDRVQcow2State *s = bs->opaque; 1186 unsigned int len, i; 1187 int ret = 0; 1188 QCowHeader header; 1189 Error *local_err = NULL; 1190 uint64_t ext_end; 1191 uint64_t l1_vm_state_index; 1192 bool update_header = false; 1193 1194 ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); 1195 if (ret < 0) { 1196 error_setg_errno(errp, -ret, "Could not read qcow2 header"); 1197 goto fail; 1198 } 1199 header.magic = be32_to_cpu(header.magic); 1200 header.version = be32_to_cpu(header.version); 1201 header.backing_file_offset = be64_to_cpu(header.backing_file_offset); 1202 header.backing_file_size = be32_to_cpu(header.backing_file_size); 1203 header.size = be64_to_cpu(header.size); 1204 header.cluster_bits = be32_to_cpu(header.cluster_bits); 1205 header.crypt_method = be32_to_cpu(header.crypt_method); 1206 header.l1_table_offset = be64_to_cpu(header.l1_table_offset); 1207 header.l1_size = be32_to_cpu(header.l1_size); 1208 header.refcount_table_offset = be64_to_cpu(header.refcount_table_offset); 1209 header.refcount_table_clusters = 1210 be32_to_cpu(header.refcount_table_clusters); 1211 header.snapshots_offset = be64_to_cpu(header.snapshots_offset); 1212 header.nb_snapshots = be32_to_cpu(header.nb_snapshots); 1213 1214 if (header.magic != QCOW_MAGIC) { 1215 error_setg(errp, "Image is not in qcow2 format"); 1216 ret = -EINVAL; 1217 goto fail; 1218 } 1219 if (header.version < 2 || header.version > 3) { 1220 error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version); 1221 ret = -ENOTSUP; 1222 goto fail; 1223 } 1224 1225 s->qcow_version = header.version; 1226 1227 /* Initialise cluster size */ 1228 if (header.cluster_bits < MIN_CLUSTER_BITS || 1229 header.cluster_bits > MAX_CLUSTER_BITS) { 1230 error_setg(errp, "Unsupported cluster size: 2^%" PRIu32, 1231 header.cluster_bits); 1232 ret = -EINVAL; 1233 goto fail; 1234 } 1235 1236 s->cluster_bits = header.cluster_bits; 1237 s->cluster_size = 1 << s->cluster_bits; 1238 s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS); 1239 1240 /* Initialise version 3 header fields */ 1241 if (header.version == 2) { 1242 header.incompatible_features = 0; 1243 header.compatible_features = 0; 1244 header.autoclear_features = 0; 1245 header.refcount_order = 4; 1246 header.header_length = 72; 1247 } else { 1248 header.incompatible_features = 1249 be64_to_cpu(header.incompatible_features); 1250 header.compatible_features = be64_to_cpu(header.compatible_features); 1251 header.autoclear_features = be64_to_cpu(header.autoclear_features); 1252 header.refcount_order = be32_to_cpu(header.refcount_order); 1253 header.header_length = be32_to_cpu(header.header_length); 1254 1255 if (header.header_length < 104) { 1256 error_setg(errp, "qcow2 header too short"); 1257 ret = -EINVAL; 1258 goto fail; 1259 } 1260 } 1261 1262 if (header.header_length > s->cluster_size) { 1263 error_setg(errp, "qcow2 header exceeds cluster size"); 1264 ret = -EINVAL; 1265 goto fail; 1266 } 1267 1268 if (header.header_length > sizeof(header)) { 1269 s->unknown_header_fields_size = header.header_length - sizeof(header); 1270 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); 1271 ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, 1272 s->unknown_header_fields_size); 1273 if (ret < 0) { 1274 error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " 1275 "fields"); 1276 goto fail; 1277 } 1278 } 1279 1280 if (header.backing_file_offset > s->cluster_size) { 1281 error_setg(errp, "Invalid backing file offset"); 1282 ret = -EINVAL; 1283 goto fail; 1284 } 1285 1286 if (header.backing_file_offset) { 1287 ext_end = header.backing_file_offset; 1288 } else { 1289 ext_end = 1 << header.cluster_bits; 1290 } 1291 1292 /* Handle feature bits */ 1293 s->incompatible_features = header.incompatible_features; 1294 s->compatible_features = header.compatible_features; 1295 s->autoclear_features = header.autoclear_features; 1296 1297 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { 1298 void *feature_table = NULL; 1299 qcow2_read_extensions(bs, header.header_length, ext_end, 1300 &feature_table, flags, NULL, NULL); 1301 report_unsupported_feature(errp, feature_table, 1302 s->incompatible_features & 1303 ~QCOW2_INCOMPAT_MASK); 1304 ret = -ENOTSUP; 1305 g_free(feature_table); 1306 goto fail; 1307 } 1308 1309 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 1310 /* Corrupt images may not be written to unless they are being repaired 1311 */ 1312 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { 1313 error_setg(errp, "qcow2: Image is corrupt; cannot be opened " 1314 "read/write"); 1315 ret = -EACCES; 1316 goto fail; 1317 } 1318 } 1319 1320 /* Check support for various header values */ 1321 if (header.refcount_order > 6) { 1322 error_setg(errp, "Reference count entry width too large; may not " 1323 "exceed 64 bits"); 1324 ret = -EINVAL; 1325 goto fail; 1326 } 1327 s->refcount_order = header.refcount_order; 1328 s->refcount_bits = 1 << s->refcount_order; 1329 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); 1330 s->refcount_max += s->refcount_max - 1; 1331 1332 s->crypt_method_header = header.crypt_method; 1333 if (s->crypt_method_header) { 1334 if (bdrv_uses_whitelist() && 1335 s->crypt_method_header == QCOW_CRYPT_AES) { 1336 error_setg(errp, 1337 "Use of AES-CBC encrypted qcow2 images is no longer " 1338 "supported in system emulators"); 1339 error_append_hint(errp, 1340 "You can use 'qemu-img convert' to convert your " 1341 "image to an alternative supported format, such " 1342 "as unencrypted qcow2, or raw with the LUKS " 1343 "format instead.\n"); 1344 ret = -ENOSYS; 1345 goto fail; 1346 } 1347 1348 if (s->crypt_method_header == QCOW_CRYPT_AES) { 1349 s->crypt_physical_offset = false; 1350 } else { 1351 /* Assuming LUKS and any future crypt methods we 1352 * add will all use physical offsets, due to the 1353 * fact that the alternative is insecure... */ 1354 s->crypt_physical_offset = true; 1355 } 1356 1357 bs->encrypted = true; 1358 } 1359 1360 s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ 1361 s->l2_size = 1 << s->l2_bits; 1362 /* 2^(s->refcount_order - 3) is the refcount width in bytes */ 1363 s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3); 1364 s->refcount_block_size = 1 << s->refcount_block_bits; 1365 bs->total_sectors = header.size / BDRV_SECTOR_SIZE; 1366 s->csize_shift = (62 - (s->cluster_bits - 8)); 1367 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; 1368 s->cluster_offset_mask = (1LL << s->csize_shift) - 1; 1369 1370 s->refcount_table_offset = header.refcount_table_offset; 1371 s->refcount_table_size = 1372 header.refcount_table_clusters << (s->cluster_bits - 3); 1373 1374 if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) { 1375 error_setg(errp, "Image does not contain a reference count table"); 1376 ret = -EINVAL; 1377 goto fail; 1378 } 1379 1380 ret = qcow2_validate_table(bs, s->refcount_table_offset, 1381 header.refcount_table_clusters, 1382 s->cluster_size, QCOW_MAX_REFTABLE_SIZE, 1383 "Reference count table", errp); 1384 if (ret < 0) { 1385 goto fail; 1386 } 1387 1388 /* The total size in bytes of the snapshot table is checked in 1389 * qcow2_read_snapshots() because the size of each snapshot is 1390 * variable and we don't know it yet. 1391 * Here we only check the offset and number of snapshots. */ 1392 ret = qcow2_validate_table(bs, header.snapshots_offset, 1393 header.nb_snapshots, 1394 sizeof(QCowSnapshotHeader), 1395 sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS, 1396 "Snapshot table", errp); 1397 if (ret < 0) { 1398 goto fail; 1399 } 1400 1401 /* read the level 1 table */ 1402 ret = qcow2_validate_table(bs, header.l1_table_offset, 1403 header.l1_size, sizeof(uint64_t), 1404 QCOW_MAX_L1_SIZE, "Active L1 table", errp); 1405 if (ret < 0) { 1406 goto fail; 1407 } 1408 s->l1_size = header.l1_size; 1409 s->l1_table_offset = header.l1_table_offset; 1410 1411 l1_vm_state_index = size_to_l1(s, header.size); 1412 if (l1_vm_state_index > INT_MAX) { 1413 error_setg(errp, "Image is too big"); 1414 ret = -EFBIG; 1415 goto fail; 1416 } 1417 s->l1_vm_state_index = l1_vm_state_index; 1418 1419 /* the L1 table must contain at least enough entries to put 1420 header.size bytes */ 1421 if (s->l1_size < s->l1_vm_state_index) { 1422 error_setg(errp, "L1 table is too small"); 1423 ret = -EINVAL; 1424 goto fail; 1425 } 1426 1427 if (s->l1_size > 0) { 1428 s->l1_table = qemu_try_blockalign(bs->file->bs, 1429 ROUND_UP(s->l1_size * sizeof(uint64_t), 512)); 1430 if (s->l1_table == NULL) { 1431 error_setg(errp, "Could not allocate L1 table"); 1432 ret = -ENOMEM; 1433 goto fail; 1434 } 1435 ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, 1436 s->l1_size * sizeof(uint64_t)); 1437 if (ret < 0) { 1438 error_setg_errno(errp, -ret, "Could not read L1 table"); 1439 goto fail; 1440 } 1441 for(i = 0;i < s->l1_size; i++) { 1442 s->l1_table[i] = be64_to_cpu(s->l1_table[i]); 1443 } 1444 } 1445 1446 /* Parse driver-specific options */ 1447 ret = qcow2_update_options(bs, options, flags, errp); 1448 if (ret < 0) { 1449 goto fail; 1450 } 1451 1452 s->flags = flags; 1453 1454 ret = qcow2_refcount_init(bs); 1455 if (ret != 0) { 1456 error_setg_errno(errp, -ret, "Could not initialize refcount handling"); 1457 goto fail; 1458 } 1459 1460 QLIST_INIT(&s->cluster_allocs); 1461 QTAILQ_INIT(&s->discards); 1462 1463 /* read qcow2 extensions */ 1464 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL, 1465 flags, &update_header, &local_err)) { 1466 error_propagate(errp, local_err); 1467 ret = -EINVAL; 1468 goto fail; 1469 } 1470 1471 /* Open external data file */ 1472 s->data_file = bdrv_open_child(NULL, options, "data-file", bs, &child_file, 1473 true, &local_err); 1474 if (local_err) { 1475 error_propagate(errp, local_err); 1476 ret = -EINVAL; 1477 goto fail; 1478 } 1479 1480 if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) { 1481 if (!s->data_file && s->image_data_file) { 1482 s->data_file = bdrv_open_child(s->image_data_file, options, 1483 "data-file", bs, &child_file, 1484 false, errp); 1485 if (!s->data_file) { 1486 ret = -EINVAL; 1487 goto fail; 1488 } 1489 } 1490 if (!s->data_file) { 1491 error_setg(errp, "'data-file' is required for this image"); 1492 ret = -EINVAL; 1493 goto fail; 1494 } 1495 } else { 1496 if (s->data_file) { 1497 error_setg(errp, "'data-file' can only be set for images with an " 1498 "external data file"); 1499 ret = -EINVAL; 1500 goto fail; 1501 } 1502 1503 s->data_file = bs->file; 1504 1505 if (data_file_is_raw(bs)) { 1506 error_setg(errp, "data-file-raw requires a data file"); 1507 ret = -EINVAL; 1508 goto fail; 1509 } 1510 } 1511 1512 /* qcow2_read_extension may have set up the crypto context 1513 * if the crypt method needs a header region, some methods 1514 * don't need header extensions, so must check here 1515 */ 1516 if (s->crypt_method_header && !s->crypto) { 1517 if (s->crypt_method_header == QCOW_CRYPT_AES) { 1518 unsigned int cflags = 0; 1519 if (flags & BDRV_O_NO_IO) { 1520 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 1521 } 1522 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 1523 NULL, NULL, cflags, 1, errp); 1524 if (!s->crypto) { 1525 ret = -EINVAL; 1526 goto fail; 1527 } 1528 } else if (!(flags & BDRV_O_NO_IO)) { 1529 error_setg(errp, "Missing CRYPTO header for crypt method %d", 1530 s->crypt_method_header); 1531 ret = -EINVAL; 1532 goto fail; 1533 } 1534 } 1535 1536 /* read the backing file name */ 1537 if (header.backing_file_offset != 0) { 1538 len = header.backing_file_size; 1539 if (len > MIN(1023, s->cluster_size - header.backing_file_offset) || 1540 len >= sizeof(bs->backing_file)) { 1541 error_setg(errp, "Backing file name too long"); 1542 ret = -EINVAL; 1543 goto fail; 1544 } 1545 ret = bdrv_pread(bs->file, header.backing_file_offset, 1546 bs->auto_backing_file, len); 1547 if (ret < 0) { 1548 error_setg_errno(errp, -ret, "Could not read backing file name"); 1549 goto fail; 1550 } 1551 bs->auto_backing_file[len] = '\0'; 1552 pstrcpy(bs->backing_file, sizeof(bs->backing_file), 1553 bs->auto_backing_file); 1554 s->image_backing_file = g_strdup(bs->auto_backing_file); 1555 } 1556 1557 /* Internal snapshots */ 1558 s->snapshots_offset = header.snapshots_offset; 1559 s->nb_snapshots = header.nb_snapshots; 1560 1561 ret = qcow2_read_snapshots(bs); 1562 if (ret < 0) { 1563 error_setg_errno(errp, -ret, "Could not read snapshots"); 1564 goto fail; 1565 } 1566 1567 /* Clear unknown autoclear feature bits */ 1568 update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; 1569 update_header = 1570 update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); 1571 if (update_header) { 1572 s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; 1573 } 1574 1575 /* == Handle persistent dirty bitmaps == 1576 * 1577 * We want load dirty bitmaps in three cases: 1578 * 1579 * 1. Normal open of the disk in active mode, not related to invalidation 1580 * after migration. 1581 * 1582 * 2. Invalidation of the target vm after pre-copy phase of migration, if 1583 * bitmaps are _not_ migrating through migration channel, i.e. 1584 * 'dirty-bitmaps' capability is disabled. 1585 * 1586 * 3. Invalidation of source vm after failed or canceled migration. 1587 * This is a very interesting case. There are two possible types of 1588 * bitmaps: 1589 * 1590 * A. Stored on inactivation and removed. They should be loaded from the 1591 * image. 1592 * 1593 * B. Not stored: not-persistent bitmaps and bitmaps, migrated through 1594 * the migration channel (with dirty-bitmaps capability). 1595 * 1596 * On the other hand, there are two possible sub-cases: 1597 * 1598 * 3.1 disk was changed by somebody else while were inactive. In this 1599 * case all in-RAM dirty bitmaps (both persistent and not) are 1600 * definitely invalid. And we don't have any method to determine 1601 * this. 1602 * 1603 * Simple and safe thing is to just drop all the bitmaps of type B on 1604 * inactivation. But in this case we lose bitmaps in valid 4.2 case. 1605 * 1606 * On the other hand, resuming source vm, if disk was already changed 1607 * is a bad thing anyway: not only bitmaps, the whole vm state is 1608 * out of sync with disk. 1609 * 1610 * This means, that user or management tool, who for some reason 1611 * decided to resume source vm, after disk was already changed by 1612 * target vm, should at least drop all dirty bitmaps by hand. 1613 * 1614 * So, we can ignore this case for now, but TODO: "generation" 1615 * extension for qcow2, to determine, that image was changed after 1616 * last inactivation. And if it is changed, we will drop (or at least 1617 * mark as 'invalid' all the bitmaps of type B, both persistent 1618 * and not). 1619 * 1620 * 3.2 disk was _not_ changed while were inactive. Bitmaps may be saved 1621 * to disk ('dirty-bitmaps' capability disabled), or not saved 1622 * ('dirty-bitmaps' capability enabled), but we don't need to care 1623 * of: let's load bitmaps as always: stored bitmaps will be loaded, 1624 * and not stored has flag IN_USE=1 in the image and will be skipped 1625 * on loading. 1626 * 1627 * One remaining possible case when we don't want load bitmaps: 1628 * 1629 * 4. Open disk in inactive mode in target vm (bitmaps are migrating or 1630 * will be loaded on invalidation, no needs try loading them before) 1631 */ 1632 1633 if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) { 1634 /* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */ 1635 bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err); 1636 1637 update_header = update_header && !header_updated; 1638 } 1639 if (local_err != NULL) { 1640 error_propagate(errp, local_err); 1641 ret = -EINVAL; 1642 goto fail; 1643 } 1644 1645 if (update_header) { 1646 ret = qcow2_update_header(bs); 1647 if (ret < 0) { 1648 error_setg_errno(errp, -ret, "Could not update qcow2 header"); 1649 goto fail; 1650 } 1651 } 1652 1653 bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0; 1654 1655 /* Repair image if dirty */ 1656 if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && 1657 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { 1658 BdrvCheckResult result = {0}; 1659 1660 ret = qcow2_co_check_locked(bs, &result, 1661 BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); 1662 if (ret < 0 || result.check_errors) { 1663 if (ret >= 0) { 1664 ret = -EIO; 1665 } 1666 error_setg_errno(errp, -ret, "Could not repair dirty image"); 1667 goto fail; 1668 } 1669 } 1670 1671 #ifdef DEBUG_ALLOC 1672 { 1673 BdrvCheckResult result = {0}; 1674 qcow2_check_refcounts(bs, &result, 0); 1675 } 1676 #endif 1677 1678 qemu_co_queue_init(&s->compress_wait_queue); 1679 1680 return ret; 1681 1682 fail: 1683 g_free(s->image_data_file); 1684 if (has_data_file(bs)) { 1685 bdrv_unref_child(bs, s->data_file); 1686 } 1687 g_free(s->unknown_header_fields); 1688 cleanup_unknown_header_ext(bs); 1689 qcow2_free_snapshots(bs); 1690 qcow2_refcount_close(bs); 1691 qemu_vfree(s->l1_table); 1692 /* else pre-write overlap checks in cache_destroy may crash */ 1693 s->l1_table = NULL; 1694 cache_clean_timer_del(bs); 1695 if (s->l2_table_cache) { 1696 qcow2_cache_destroy(s->l2_table_cache); 1697 } 1698 if (s->refcount_block_cache) { 1699 qcow2_cache_destroy(s->refcount_block_cache); 1700 } 1701 qcrypto_block_free(s->crypto); 1702 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 1703 return ret; 1704 } 1705 1706 typedef struct QCow2OpenCo { 1707 BlockDriverState *bs; 1708 QDict *options; 1709 int flags; 1710 Error **errp; 1711 int ret; 1712 } QCow2OpenCo; 1713 1714 static void coroutine_fn qcow2_open_entry(void *opaque) 1715 { 1716 QCow2OpenCo *qoc = opaque; 1717 BDRVQcow2State *s = qoc->bs->opaque; 1718 1719 qemu_co_mutex_lock(&s->lock); 1720 qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp); 1721 qemu_co_mutex_unlock(&s->lock); 1722 } 1723 1724 static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, 1725 Error **errp) 1726 { 1727 BDRVQcow2State *s = bs->opaque; 1728 QCow2OpenCo qoc = { 1729 .bs = bs, 1730 .options = options, 1731 .flags = flags, 1732 .errp = errp, 1733 .ret = -EINPROGRESS 1734 }; 1735 1736 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 1737 false, errp); 1738 if (!bs->file) { 1739 return -EINVAL; 1740 } 1741 1742 /* Initialise locks */ 1743 qemu_co_mutex_init(&s->lock); 1744 1745 if (qemu_in_coroutine()) { 1746 /* From bdrv_co_create. */ 1747 qcow2_open_entry(&qoc); 1748 } else { 1749 assert(qemu_get_current_aio_context() == qemu_get_aio_context()); 1750 qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc)); 1751 BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); 1752 } 1753 return qoc.ret; 1754 } 1755 1756 static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) 1757 { 1758 BDRVQcow2State *s = bs->opaque; 1759 1760 if (bs->encrypted) { 1761 /* Encryption works on a sector granularity */ 1762 bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto); 1763 } 1764 bs->bl.pwrite_zeroes_alignment = s->cluster_size; 1765 bs->bl.pdiscard_alignment = s->cluster_size; 1766 } 1767 1768 static int qcow2_reopen_prepare(BDRVReopenState *state, 1769 BlockReopenQueue *queue, Error **errp) 1770 { 1771 Qcow2ReopenState *r; 1772 int ret; 1773 1774 r = g_new0(Qcow2ReopenState, 1); 1775 state->opaque = r; 1776 1777 ret = qcow2_update_options_prepare(state->bs, r, state->options, 1778 state->flags, errp); 1779 if (ret < 0) { 1780 goto fail; 1781 } 1782 1783 /* We need to write out any unwritten data if we reopen read-only. */ 1784 if ((state->flags & BDRV_O_RDWR) == 0) { 1785 ret = qcow2_reopen_bitmaps_ro(state->bs, errp); 1786 if (ret < 0) { 1787 goto fail; 1788 } 1789 1790 ret = bdrv_flush(state->bs); 1791 if (ret < 0) { 1792 goto fail; 1793 } 1794 1795 ret = qcow2_mark_clean(state->bs); 1796 if (ret < 0) { 1797 goto fail; 1798 } 1799 } 1800 1801 return 0; 1802 1803 fail: 1804 qcow2_update_options_abort(state->bs, r); 1805 g_free(r); 1806 return ret; 1807 } 1808 1809 static void qcow2_reopen_commit(BDRVReopenState *state) 1810 { 1811 qcow2_update_options_commit(state->bs, state->opaque); 1812 g_free(state->opaque); 1813 } 1814 1815 static void qcow2_reopen_abort(BDRVReopenState *state) 1816 { 1817 qcow2_update_options_abort(state->bs, state->opaque); 1818 g_free(state->opaque); 1819 } 1820 1821 static void qcow2_join_options(QDict *options, QDict *old_options) 1822 { 1823 bool has_new_overlap_template = 1824 qdict_haskey(options, QCOW2_OPT_OVERLAP) || 1825 qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE); 1826 bool has_new_total_cache_size = 1827 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE); 1828 bool has_all_cache_options; 1829 1830 /* New overlap template overrides all old overlap options */ 1831 if (has_new_overlap_template) { 1832 qdict_del(old_options, QCOW2_OPT_OVERLAP); 1833 qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE); 1834 qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER); 1835 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1); 1836 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2); 1837 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE); 1838 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK); 1839 qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE); 1840 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1); 1841 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2); 1842 } 1843 1844 /* New total cache size overrides all old options */ 1845 if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) { 1846 qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE); 1847 qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 1848 } 1849 1850 qdict_join(options, old_options, false); 1851 1852 /* 1853 * If after merging all cache size options are set, an old total size is 1854 * overwritten. Do keep all options, however, if all three are new. The 1855 * resulting error message is what we want to happen. 1856 */ 1857 has_all_cache_options = 1858 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) || 1859 qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) || 1860 qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 1861 1862 if (has_all_cache_options && !has_new_total_cache_size) { 1863 qdict_del(options, QCOW2_OPT_CACHE_SIZE); 1864 } 1865 } 1866 1867 static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, 1868 bool want_zero, 1869 int64_t offset, int64_t count, 1870 int64_t *pnum, int64_t *map, 1871 BlockDriverState **file) 1872 { 1873 BDRVQcow2State *s = bs->opaque; 1874 uint64_t cluster_offset; 1875 int index_in_cluster, ret; 1876 unsigned int bytes; 1877 int status = 0; 1878 1879 bytes = MIN(INT_MAX, count); 1880 qemu_co_mutex_lock(&s->lock); 1881 ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); 1882 qemu_co_mutex_unlock(&s->lock); 1883 if (ret < 0) { 1884 return ret; 1885 } 1886 1887 *pnum = bytes; 1888 1889 if ((ret == QCOW2_CLUSTER_NORMAL || ret == QCOW2_CLUSTER_ZERO_ALLOC) && 1890 !s->crypto) { 1891 index_in_cluster = offset & (s->cluster_size - 1); 1892 *map = cluster_offset | index_in_cluster; 1893 *file = s->data_file->bs; 1894 status |= BDRV_BLOCK_OFFSET_VALID; 1895 } 1896 if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) { 1897 status |= BDRV_BLOCK_ZERO; 1898 } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { 1899 status |= BDRV_BLOCK_DATA; 1900 } 1901 return status; 1902 } 1903 1904 static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, 1905 QCowL2Meta **pl2meta, 1906 bool link_l2) 1907 { 1908 int ret = 0; 1909 QCowL2Meta *l2meta = *pl2meta; 1910 1911 while (l2meta != NULL) { 1912 QCowL2Meta *next; 1913 1914 if (link_l2) { 1915 ret = qcow2_alloc_cluster_link_l2(bs, l2meta); 1916 if (ret) { 1917 goto out; 1918 } 1919 } else { 1920 qcow2_alloc_cluster_abort(bs, l2meta); 1921 } 1922 1923 /* Take the request off the list of running requests */ 1924 if (l2meta->nb_clusters != 0) { 1925 QLIST_REMOVE(l2meta, next_in_flight); 1926 } 1927 1928 qemu_co_queue_restart_all(&l2meta->dependent_requests); 1929 1930 next = l2meta->next; 1931 g_free(l2meta); 1932 l2meta = next; 1933 } 1934 out: 1935 *pl2meta = l2meta; 1936 return ret; 1937 } 1938 1939 static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, 1940 uint64_t bytes, QEMUIOVector *qiov, 1941 int flags) 1942 { 1943 BDRVQcow2State *s = bs->opaque; 1944 int offset_in_cluster; 1945 int ret; 1946 unsigned int cur_bytes; /* number of bytes in current iteration */ 1947 uint64_t cluster_offset = 0; 1948 uint64_t bytes_done = 0; 1949 QEMUIOVector hd_qiov; 1950 uint8_t *cluster_data = NULL; 1951 1952 qemu_iovec_init(&hd_qiov, qiov->niov); 1953 1954 qemu_co_mutex_lock(&s->lock); 1955 1956 while (bytes != 0) { 1957 1958 /* prepare next request */ 1959 cur_bytes = MIN(bytes, INT_MAX); 1960 if (s->crypto) { 1961 cur_bytes = MIN(cur_bytes, 1962 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 1963 } 1964 1965 ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset); 1966 if (ret < 0) { 1967 goto fail; 1968 } 1969 1970 offset_in_cluster = offset_into_cluster(s, offset); 1971 1972 qemu_iovec_reset(&hd_qiov); 1973 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); 1974 1975 switch (ret) { 1976 case QCOW2_CLUSTER_UNALLOCATED: 1977 1978 if (bs->backing) { 1979 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 1980 qemu_co_mutex_unlock(&s->lock); 1981 ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, 1982 &hd_qiov, 0); 1983 qemu_co_mutex_lock(&s->lock); 1984 if (ret < 0) { 1985 goto fail; 1986 } 1987 } else { 1988 /* Note: in this case, no need to wait */ 1989 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); 1990 } 1991 break; 1992 1993 case QCOW2_CLUSTER_ZERO_PLAIN: 1994 case QCOW2_CLUSTER_ZERO_ALLOC: 1995 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); 1996 break; 1997 1998 case QCOW2_CLUSTER_COMPRESSED: 1999 qemu_co_mutex_unlock(&s->lock); 2000 ret = qcow2_co_preadv_compressed(bs, cluster_offset, 2001 offset, cur_bytes, 2002 &hd_qiov); 2003 qemu_co_mutex_lock(&s->lock); 2004 if (ret < 0) { 2005 goto fail; 2006 } 2007 2008 break; 2009 2010 case QCOW2_CLUSTER_NORMAL: 2011 if ((cluster_offset & 511) != 0) { 2012 ret = -EIO; 2013 goto fail; 2014 } 2015 2016 if (bs->encrypted) { 2017 assert(s->crypto); 2018 2019 /* 2020 * For encrypted images, read everything into a temporary 2021 * contiguous buffer on which the AES functions can work. 2022 */ 2023 if (!cluster_data) { 2024 cluster_data = 2025 qemu_try_blockalign(s->data_file->bs, 2026 QCOW_MAX_CRYPT_CLUSTERS 2027 * s->cluster_size); 2028 if (cluster_data == NULL) { 2029 ret = -ENOMEM; 2030 goto fail; 2031 } 2032 } 2033 2034 assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 2035 qemu_iovec_reset(&hd_qiov); 2036 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); 2037 } 2038 2039 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 2040 qemu_co_mutex_unlock(&s->lock); 2041 ret = bdrv_co_preadv(s->data_file, 2042 cluster_offset + offset_in_cluster, 2043 cur_bytes, &hd_qiov, 0); 2044 qemu_co_mutex_lock(&s->lock); 2045 if (ret < 0) { 2046 goto fail; 2047 } 2048 if (bs->encrypted) { 2049 assert(s->crypto); 2050 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 2051 assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 2052 if (qcrypto_block_decrypt(s->crypto, 2053 (s->crypt_physical_offset ? 2054 cluster_offset + offset_in_cluster : 2055 offset), 2056 cluster_data, 2057 cur_bytes, 2058 NULL) < 0) { 2059 ret = -EIO; 2060 goto fail; 2061 } 2062 qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes); 2063 } 2064 break; 2065 2066 default: 2067 g_assert_not_reached(); 2068 ret = -EIO; 2069 goto fail; 2070 } 2071 2072 bytes -= cur_bytes; 2073 offset += cur_bytes; 2074 bytes_done += cur_bytes; 2075 } 2076 ret = 0; 2077 2078 fail: 2079 qemu_co_mutex_unlock(&s->lock); 2080 2081 qemu_iovec_destroy(&hd_qiov); 2082 qemu_vfree(cluster_data); 2083 2084 return ret; 2085 } 2086 2087 /* Check if it's possible to merge a write request with the writing of 2088 * the data from the COW regions */ 2089 static bool merge_cow(uint64_t offset, unsigned bytes, 2090 QEMUIOVector *hd_qiov, QCowL2Meta *l2meta) 2091 { 2092 QCowL2Meta *m; 2093 2094 for (m = l2meta; m != NULL; m = m->next) { 2095 /* If both COW regions are empty then there's nothing to merge */ 2096 if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) { 2097 continue; 2098 } 2099 2100 /* The data (middle) region must be immediately after the 2101 * start region */ 2102 if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) { 2103 continue; 2104 } 2105 2106 /* The end region must be immediately after the data (middle) 2107 * region */ 2108 if (m->offset + m->cow_end.offset != offset + bytes) { 2109 continue; 2110 } 2111 2112 /* Make sure that adding both COW regions to the QEMUIOVector 2113 * does not exceed IOV_MAX */ 2114 if (hd_qiov->niov > IOV_MAX - 2) { 2115 continue; 2116 } 2117 2118 m->data_qiov = hd_qiov; 2119 return true; 2120 } 2121 2122 return false; 2123 } 2124 2125 static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, 2126 uint64_t bytes, QEMUIOVector *qiov, 2127 int flags) 2128 { 2129 BDRVQcow2State *s = bs->opaque; 2130 int offset_in_cluster; 2131 int ret; 2132 unsigned int cur_bytes; /* number of sectors in current iteration */ 2133 uint64_t cluster_offset; 2134 QEMUIOVector hd_qiov; 2135 uint64_t bytes_done = 0; 2136 uint8_t *cluster_data = NULL; 2137 QCowL2Meta *l2meta = NULL; 2138 2139 trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); 2140 2141 qemu_iovec_init(&hd_qiov, qiov->niov); 2142 2143 qemu_co_mutex_lock(&s->lock); 2144 2145 while (bytes != 0) { 2146 2147 l2meta = NULL; 2148 2149 trace_qcow2_writev_start_part(qemu_coroutine_self()); 2150 offset_in_cluster = offset_into_cluster(s, offset); 2151 cur_bytes = MIN(bytes, INT_MAX); 2152 if (bs->encrypted) { 2153 cur_bytes = MIN(cur_bytes, 2154 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size 2155 - offset_in_cluster); 2156 } 2157 2158 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, 2159 &cluster_offset, &l2meta); 2160 if (ret < 0) { 2161 goto fail; 2162 } 2163 2164 assert((cluster_offset & 511) == 0); 2165 2166 qemu_iovec_reset(&hd_qiov); 2167 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); 2168 2169 if (bs->encrypted) { 2170 assert(s->crypto); 2171 if (!cluster_data) { 2172 cluster_data = qemu_try_blockalign(bs->file->bs, 2173 QCOW_MAX_CRYPT_CLUSTERS 2174 * s->cluster_size); 2175 if (cluster_data == NULL) { 2176 ret = -ENOMEM; 2177 goto fail; 2178 } 2179 } 2180 2181 assert(hd_qiov.size <= 2182 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 2183 qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); 2184 2185 if (qcrypto_block_encrypt(s->crypto, 2186 (s->crypt_physical_offset ? 2187 cluster_offset + offset_in_cluster : 2188 offset), 2189 cluster_data, 2190 cur_bytes, NULL) < 0) { 2191 ret = -EIO; 2192 goto fail; 2193 } 2194 2195 qemu_iovec_reset(&hd_qiov); 2196 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); 2197 } 2198 2199 ret = qcow2_pre_write_overlap_check(bs, 0, 2200 cluster_offset + offset_in_cluster, cur_bytes, true); 2201 if (ret < 0) { 2202 goto fail; 2203 } 2204 2205 /* If we need to do COW, check if it's possible to merge the 2206 * writing of the guest data together with that of the COW regions. 2207 * If it's not possible (or not necessary) then write the 2208 * guest data now. */ 2209 if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) { 2210 qemu_co_mutex_unlock(&s->lock); 2211 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 2212 trace_qcow2_writev_data(qemu_coroutine_self(), 2213 cluster_offset + offset_in_cluster); 2214 ret = bdrv_co_pwritev(s->data_file, 2215 cluster_offset + offset_in_cluster, 2216 cur_bytes, &hd_qiov, 0); 2217 qemu_co_mutex_lock(&s->lock); 2218 if (ret < 0) { 2219 goto fail; 2220 } 2221 } 2222 2223 ret = qcow2_handle_l2meta(bs, &l2meta, true); 2224 if (ret) { 2225 goto fail; 2226 } 2227 2228 bytes -= cur_bytes; 2229 offset += cur_bytes; 2230 bytes_done += cur_bytes; 2231 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); 2232 } 2233 ret = 0; 2234 2235 fail: 2236 qcow2_handle_l2meta(bs, &l2meta, false); 2237 2238 qemu_co_mutex_unlock(&s->lock); 2239 2240 qemu_iovec_destroy(&hd_qiov); 2241 qemu_vfree(cluster_data); 2242 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 2243 2244 return ret; 2245 } 2246 2247 static int qcow2_inactivate(BlockDriverState *bs) 2248 { 2249 BDRVQcow2State *s = bs->opaque; 2250 int ret, result = 0; 2251 Error *local_err = NULL; 2252 2253 qcow2_store_persistent_dirty_bitmaps(bs, &local_err); 2254 if (local_err != NULL) { 2255 result = -EINVAL; 2256 error_reportf_err(local_err, "Lost persistent bitmaps during " 2257 "inactivation of node '%s': ", 2258 bdrv_get_device_or_node_name(bs)); 2259 } 2260 2261 ret = qcow2_cache_flush(bs, s->l2_table_cache); 2262 if (ret) { 2263 result = ret; 2264 error_report("Failed to flush the L2 table cache: %s", 2265 strerror(-ret)); 2266 } 2267 2268 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 2269 if (ret) { 2270 result = ret; 2271 error_report("Failed to flush the refcount block cache: %s", 2272 strerror(-ret)); 2273 } 2274 2275 if (result == 0) { 2276 qcow2_mark_clean(bs); 2277 } 2278 2279 return result; 2280 } 2281 2282 static void qcow2_close(BlockDriverState *bs) 2283 { 2284 BDRVQcow2State *s = bs->opaque; 2285 qemu_vfree(s->l1_table); 2286 /* else pre-write overlap checks in cache_destroy may crash */ 2287 s->l1_table = NULL; 2288 2289 if (!(s->flags & BDRV_O_INACTIVE)) { 2290 qcow2_inactivate(bs); 2291 } 2292 2293 cache_clean_timer_del(bs); 2294 qcow2_cache_destroy(s->l2_table_cache); 2295 qcow2_cache_destroy(s->refcount_block_cache); 2296 2297 qcrypto_block_free(s->crypto); 2298 s->crypto = NULL; 2299 2300 g_free(s->unknown_header_fields); 2301 cleanup_unknown_header_ext(bs); 2302 2303 g_free(s->image_data_file); 2304 g_free(s->image_backing_file); 2305 g_free(s->image_backing_format); 2306 2307 if (has_data_file(bs)) { 2308 bdrv_unref_child(bs, s->data_file); 2309 } 2310 2311 qcow2_refcount_close(bs); 2312 qcow2_free_snapshots(bs); 2313 } 2314 2315 static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs, 2316 Error **errp) 2317 { 2318 BDRVQcow2State *s = bs->opaque; 2319 int flags = s->flags; 2320 QCryptoBlock *crypto = NULL; 2321 QDict *options; 2322 Error *local_err = NULL; 2323 int ret; 2324 2325 /* 2326 * Backing files are read-only which makes all of their metadata immutable, 2327 * that means we don't have to worry about reopening them here. 2328 */ 2329 2330 crypto = s->crypto; 2331 s->crypto = NULL; 2332 2333 qcow2_close(bs); 2334 2335 memset(s, 0, sizeof(BDRVQcow2State)); 2336 options = qdict_clone_shallow(bs->options); 2337 2338 flags &= ~BDRV_O_INACTIVE; 2339 qemu_co_mutex_lock(&s->lock); 2340 ret = qcow2_do_open(bs, options, flags, &local_err); 2341 qemu_co_mutex_unlock(&s->lock); 2342 qobject_unref(options); 2343 if (local_err) { 2344 error_propagate_prepend(errp, local_err, 2345 "Could not reopen qcow2 layer: "); 2346 bs->drv = NULL; 2347 return; 2348 } else if (ret < 0) { 2349 error_setg_errno(errp, -ret, "Could not reopen qcow2 layer"); 2350 bs->drv = NULL; 2351 return; 2352 } 2353 2354 s->crypto = crypto; 2355 } 2356 2357 static size_t header_ext_add(char *buf, uint32_t magic, const void *s, 2358 size_t len, size_t buflen) 2359 { 2360 QCowExtension *ext_backing_fmt = (QCowExtension*) buf; 2361 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); 2362 2363 if (buflen < ext_len) { 2364 return -ENOSPC; 2365 } 2366 2367 *ext_backing_fmt = (QCowExtension) { 2368 .magic = cpu_to_be32(magic), 2369 .len = cpu_to_be32(len), 2370 }; 2371 2372 if (len) { 2373 memcpy(buf + sizeof(QCowExtension), s, len); 2374 } 2375 2376 return ext_len; 2377 } 2378 2379 /* 2380 * Updates the qcow2 header, including the variable length parts of it, i.e. 2381 * the backing file name and all extensions. qcow2 was not designed to allow 2382 * such changes, so if we run out of space (we can only use the first cluster) 2383 * this function may fail. 2384 * 2385 * Returns 0 on success, -errno in error cases. 2386 */ 2387 int qcow2_update_header(BlockDriverState *bs) 2388 { 2389 BDRVQcow2State *s = bs->opaque; 2390 QCowHeader *header; 2391 char *buf; 2392 size_t buflen = s->cluster_size; 2393 int ret; 2394 uint64_t total_size; 2395 uint32_t refcount_table_clusters; 2396 size_t header_length; 2397 Qcow2UnknownHeaderExtension *uext; 2398 2399 buf = qemu_blockalign(bs, buflen); 2400 2401 /* Header structure */ 2402 header = (QCowHeader*) buf; 2403 2404 if (buflen < sizeof(*header)) { 2405 ret = -ENOSPC; 2406 goto fail; 2407 } 2408 2409 header_length = sizeof(*header) + s->unknown_header_fields_size; 2410 total_size = bs->total_sectors * BDRV_SECTOR_SIZE; 2411 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); 2412 2413 *header = (QCowHeader) { 2414 /* Version 2 fields */ 2415 .magic = cpu_to_be32(QCOW_MAGIC), 2416 .version = cpu_to_be32(s->qcow_version), 2417 .backing_file_offset = 0, 2418 .backing_file_size = 0, 2419 .cluster_bits = cpu_to_be32(s->cluster_bits), 2420 .size = cpu_to_be64(total_size), 2421 .crypt_method = cpu_to_be32(s->crypt_method_header), 2422 .l1_size = cpu_to_be32(s->l1_size), 2423 .l1_table_offset = cpu_to_be64(s->l1_table_offset), 2424 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset), 2425 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), 2426 .nb_snapshots = cpu_to_be32(s->nb_snapshots), 2427 .snapshots_offset = cpu_to_be64(s->snapshots_offset), 2428 2429 /* Version 3 fields */ 2430 .incompatible_features = cpu_to_be64(s->incompatible_features), 2431 .compatible_features = cpu_to_be64(s->compatible_features), 2432 .autoclear_features = cpu_to_be64(s->autoclear_features), 2433 .refcount_order = cpu_to_be32(s->refcount_order), 2434 .header_length = cpu_to_be32(header_length), 2435 }; 2436 2437 /* For older versions, write a shorter header */ 2438 switch (s->qcow_version) { 2439 case 2: 2440 ret = offsetof(QCowHeader, incompatible_features); 2441 break; 2442 case 3: 2443 ret = sizeof(*header); 2444 break; 2445 default: 2446 ret = -EINVAL; 2447 goto fail; 2448 } 2449 2450 buf += ret; 2451 buflen -= ret; 2452 memset(buf, 0, buflen); 2453 2454 /* Preserve any unknown field in the header */ 2455 if (s->unknown_header_fields_size) { 2456 if (buflen < s->unknown_header_fields_size) { 2457 ret = -ENOSPC; 2458 goto fail; 2459 } 2460 2461 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); 2462 buf += s->unknown_header_fields_size; 2463 buflen -= s->unknown_header_fields_size; 2464 } 2465 2466 /* Backing file format header extension */ 2467 if (s->image_backing_format) { 2468 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, 2469 s->image_backing_format, 2470 strlen(s->image_backing_format), 2471 buflen); 2472 if (ret < 0) { 2473 goto fail; 2474 } 2475 2476 buf += ret; 2477 buflen -= ret; 2478 } 2479 2480 /* External data file header extension */ 2481 if (has_data_file(bs) && s->image_data_file) { 2482 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DATA_FILE, 2483 s->image_data_file, strlen(s->image_data_file), 2484 buflen); 2485 if (ret < 0) { 2486 goto fail; 2487 } 2488 2489 buf += ret; 2490 buflen -= ret; 2491 } 2492 2493 /* Full disk encryption header pointer extension */ 2494 if (s->crypto_header.offset != 0) { 2495 s->crypto_header.offset = cpu_to_be64(s->crypto_header.offset); 2496 s->crypto_header.length = cpu_to_be64(s->crypto_header.length); 2497 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER, 2498 &s->crypto_header, sizeof(s->crypto_header), 2499 buflen); 2500 s->crypto_header.offset = be64_to_cpu(s->crypto_header.offset); 2501 s->crypto_header.length = be64_to_cpu(s->crypto_header.length); 2502 if (ret < 0) { 2503 goto fail; 2504 } 2505 buf += ret; 2506 buflen -= ret; 2507 } 2508 2509 /* Feature table */ 2510 if (s->qcow_version >= 3) { 2511 Qcow2Feature features[] = { 2512 { 2513 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2514 .bit = QCOW2_INCOMPAT_DIRTY_BITNR, 2515 .name = "dirty bit", 2516 }, 2517 { 2518 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2519 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, 2520 .name = "corrupt bit", 2521 }, 2522 { 2523 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2524 .bit = QCOW2_INCOMPAT_DATA_FILE_BITNR, 2525 .name = "external data file", 2526 }, 2527 { 2528 .type = QCOW2_FEAT_TYPE_COMPATIBLE, 2529 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, 2530 .name = "lazy refcounts", 2531 }, 2532 }; 2533 2534 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, 2535 features, sizeof(features), buflen); 2536 if (ret < 0) { 2537 goto fail; 2538 } 2539 buf += ret; 2540 buflen -= ret; 2541 } 2542 2543 /* Bitmap extension */ 2544 if (s->nb_bitmaps > 0) { 2545 Qcow2BitmapHeaderExt bitmaps_header = { 2546 .nb_bitmaps = cpu_to_be32(s->nb_bitmaps), 2547 .bitmap_directory_size = 2548 cpu_to_be64(s->bitmap_directory_size), 2549 .bitmap_directory_offset = 2550 cpu_to_be64(s->bitmap_directory_offset) 2551 }; 2552 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS, 2553 &bitmaps_header, sizeof(bitmaps_header), 2554 buflen); 2555 if (ret < 0) { 2556 goto fail; 2557 } 2558 buf += ret; 2559 buflen -= ret; 2560 } 2561 2562 /* Keep unknown header extensions */ 2563 QLIST_FOREACH(uext, &s->unknown_header_ext, next) { 2564 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); 2565 if (ret < 0) { 2566 goto fail; 2567 } 2568 2569 buf += ret; 2570 buflen -= ret; 2571 } 2572 2573 /* End of header extensions */ 2574 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); 2575 if (ret < 0) { 2576 goto fail; 2577 } 2578 2579 buf += ret; 2580 buflen -= ret; 2581 2582 /* Backing file name */ 2583 if (s->image_backing_file) { 2584 size_t backing_file_len = strlen(s->image_backing_file); 2585 2586 if (buflen < backing_file_len) { 2587 ret = -ENOSPC; 2588 goto fail; 2589 } 2590 2591 /* Using strncpy is ok here, since buf is not NUL-terminated. */ 2592 strncpy(buf, s->image_backing_file, buflen); 2593 2594 header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); 2595 header->backing_file_size = cpu_to_be32(backing_file_len); 2596 } 2597 2598 /* Write the new header */ 2599 ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); 2600 if (ret < 0) { 2601 goto fail; 2602 } 2603 2604 ret = 0; 2605 fail: 2606 qemu_vfree(header); 2607 return ret; 2608 } 2609 2610 static int qcow2_change_backing_file(BlockDriverState *bs, 2611 const char *backing_file, const char *backing_fmt) 2612 { 2613 BDRVQcow2State *s = bs->opaque; 2614 2615 /* Adding a backing file means that the external data file alone won't be 2616 * enough to make sense of the content */ 2617 if (backing_file && data_file_is_raw(bs)) { 2618 return -EINVAL; 2619 } 2620 2621 if (backing_file && strlen(backing_file) > 1023) { 2622 return -EINVAL; 2623 } 2624 2625 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), 2626 backing_file ?: ""); 2627 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2628 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2629 2630 g_free(s->image_backing_file); 2631 g_free(s->image_backing_format); 2632 2633 s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL; 2634 s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL; 2635 2636 return qcow2_update_header(bs); 2637 } 2638 2639 static int qcow2_crypt_method_from_format(const char *encryptfmt) 2640 { 2641 if (g_str_equal(encryptfmt, "luks")) { 2642 return QCOW_CRYPT_LUKS; 2643 } else if (g_str_equal(encryptfmt, "aes")) { 2644 return QCOW_CRYPT_AES; 2645 } else { 2646 return -EINVAL; 2647 } 2648 } 2649 2650 static int qcow2_set_up_encryption(BlockDriverState *bs, 2651 QCryptoBlockCreateOptions *cryptoopts, 2652 Error **errp) 2653 { 2654 BDRVQcow2State *s = bs->opaque; 2655 QCryptoBlock *crypto = NULL; 2656 int fmt, ret; 2657 2658 switch (cryptoopts->format) { 2659 case Q_CRYPTO_BLOCK_FORMAT_LUKS: 2660 fmt = QCOW_CRYPT_LUKS; 2661 break; 2662 case Q_CRYPTO_BLOCK_FORMAT_QCOW: 2663 fmt = QCOW_CRYPT_AES; 2664 break; 2665 default: 2666 error_setg(errp, "Crypto format not supported in qcow2"); 2667 return -EINVAL; 2668 } 2669 2670 s->crypt_method_header = fmt; 2671 2672 crypto = qcrypto_block_create(cryptoopts, "encrypt.", 2673 qcow2_crypto_hdr_init_func, 2674 qcow2_crypto_hdr_write_func, 2675 bs, errp); 2676 if (!crypto) { 2677 return -EINVAL; 2678 } 2679 2680 ret = qcow2_update_header(bs); 2681 if (ret < 0) { 2682 error_setg_errno(errp, -ret, "Could not write encryption header"); 2683 goto out; 2684 } 2685 2686 ret = 0; 2687 out: 2688 qcrypto_block_free(crypto); 2689 return ret; 2690 } 2691 2692 /** 2693 * Preallocates metadata structures for data clusters between @offset (in the 2694 * guest disk) and @new_length (which is thus generally the new guest disk 2695 * size). 2696 * 2697 * Returns: 0 on success, -errno on failure. 2698 */ 2699 static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, 2700 uint64_t new_length) 2701 { 2702 uint64_t bytes; 2703 uint64_t host_offset = 0; 2704 unsigned int cur_bytes; 2705 int ret; 2706 QCowL2Meta *meta; 2707 2708 assert(offset <= new_length); 2709 bytes = new_length - offset; 2710 2711 while (bytes) { 2712 cur_bytes = MIN(bytes, INT_MAX); 2713 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, 2714 &host_offset, &meta); 2715 if (ret < 0) { 2716 return ret; 2717 } 2718 2719 while (meta) { 2720 QCowL2Meta *next = meta->next; 2721 2722 ret = qcow2_alloc_cluster_link_l2(bs, meta); 2723 if (ret < 0) { 2724 qcow2_free_any_clusters(bs, meta->alloc_offset, 2725 meta->nb_clusters, QCOW2_DISCARD_NEVER); 2726 return ret; 2727 } 2728 2729 /* There are no dependent requests, but we need to remove our 2730 * request from the list of in-flight requests */ 2731 QLIST_REMOVE(meta, next_in_flight); 2732 2733 g_free(meta); 2734 meta = next; 2735 } 2736 2737 /* TODO Preallocate data if requested */ 2738 2739 bytes -= cur_bytes; 2740 offset += cur_bytes; 2741 } 2742 2743 /* 2744 * It is expected that the image file is large enough to actually contain 2745 * all of the allocated clusters (otherwise we get failing reads after 2746 * EOF). Extend the image to the last allocated sector. 2747 */ 2748 if (host_offset != 0) { 2749 uint8_t data = 0; 2750 ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1, 2751 &data, 1); 2752 if (ret < 0) { 2753 return ret; 2754 } 2755 } 2756 2757 return 0; 2758 } 2759 2760 /* qcow2_refcount_metadata_size: 2761 * @clusters: number of clusters to refcount (including data and L1/L2 tables) 2762 * @cluster_size: size of a cluster, in bytes 2763 * @refcount_order: refcount bits power-of-2 exponent 2764 * @generous_increase: allow for the refcount table to be 1.5x as large as it 2765 * needs to be 2766 * 2767 * Returns: Number of bytes required for refcount blocks and table metadata. 2768 */ 2769 int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, 2770 int refcount_order, bool generous_increase, 2771 uint64_t *refblock_count) 2772 { 2773 /* 2774 * Every host cluster is reference-counted, including metadata (even 2775 * refcount metadata is recursively included). 2776 * 2777 * An accurate formula for the size of refcount metadata size is difficult 2778 * to derive. An easier method of calculation is finding the fixed point 2779 * where no further refcount blocks or table clusters are required to 2780 * reference count every cluster. 2781 */ 2782 int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t); 2783 int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order); 2784 int64_t table = 0; /* number of refcount table clusters */ 2785 int64_t blocks = 0; /* number of refcount block clusters */ 2786 int64_t last; 2787 int64_t n = 0; 2788 2789 do { 2790 last = n; 2791 blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block); 2792 table = DIV_ROUND_UP(blocks, blocks_per_table_cluster); 2793 n = clusters + blocks + table; 2794 2795 if (n == last && generous_increase) { 2796 clusters += DIV_ROUND_UP(table, 2); 2797 n = 0; /* force another loop */ 2798 generous_increase = false; 2799 } 2800 } while (n != last); 2801 2802 if (refblock_count) { 2803 *refblock_count = blocks; 2804 } 2805 2806 return (blocks + table) * cluster_size; 2807 } 2808 2809 /** 2810 * qcow2_calc_prealloc_size: 2811 * @total_size: virtual disk size in bytes 2812 * @cluster_size: cluster size in bytes 2813 * @refcount_order: refcount bits power-of-2 exponent 2814 * 2815 * Returns: Total number of bytes required for the fully allocated image 2816 * (including metadata). 2817 */ 2818 static int64_t qcow2_calc_prealloc_size(int64_t total_size, 2819 size_t cluster_size, 2820 int refcount_order) 2821 { 2822 int64_t meta_size = 0; 2823 uint64_t nl1e, nl2e; 2824 int64_t aligned_total_size = ROUND_UP(total_size, cluster_size); 2825 2826 /* header: 1 cluster */ 2827 meta_size += cluster_size; 2828 2829 /* total size of L2 tables */ 2830 nl2e = aligned_total_size / cluster_size; 2831 nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t)); 2832 meta_size += nl2e * sizeof(uint64_t); 2833 2834 /* total size of L1 tables */ 2835 nl1e = nl2e * sizeof(uint64_t) / cluster_size; 2836 nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t)); 2837 meta_size += nl1e * sizeof(uint64_t); 2838 2839 /* total size of refcount table and blocks */ 2840 meta_size += qcow2_refcount_metadata_size( 2841 (meta_size + aligned_total_size) / cluster_size, 2842 cluster_size, refcount_order, false, NULL); 2843 2844 return meta_size + aligned_total_size; 2845 } 2846 2847 static bool validate_cluster_size(size_t cluster_size, Error **errp) 2848 { 2849 int cluster_bits = ctz32(cluster_size); 2850 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || 2851 (1 << cluster_bits) != cluster_size) 2852 { 2853 error_setg(errp, "Cluster size must be a power of two between %d and " 2854 "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); 2855 return false; 2856 } 2857 return true; 2858 } 2859 2860 static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp) 2861 { 2862 size_t cluster_size; 2863 2864 cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 2865 DEFAULT_CLUSTER_SIZE); 2866 if (!validate_cluster_size(cluster_size, errp)) { 2867 return 0; 2868 } 2869 return cluster_size; 2870 } 2871 2872 static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp) 2873 { 2874 char *buf; 2875 int ret; 2876 2877 buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL); 2878 if (!buf) { 2879 ret = 3; /* default */ 2880 } else if (!strcmp(buf, "0.10")) { 2881 ret = 2; 2882 } else if (!strcmp(buf, "1.1")) { 2883 ret = 3; 2884 } else { 2885 error_setg(errp, "Invalid compatibility level: '%s'", buf); 2886 ret = -EINVAL; 2887 } 2888 g_free(buf); 2889 return ret; 2890 } 2891 2892 static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, 2893 Error **errp) 2894 { 2895 uint64_t refcount_bits; 2896 2897 refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16); 2898 if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { 2899 error_setg(errp, "Refcount width must be a power of two and may not " 2900 "exceed 64 bits"); 2901 return 0; 2902 } 2903 2904 if (version < 3 && refcount_bits != 16) { 2905 error_setg(errp, "Different refcount widths than 16 bits require " 2906 "compatibility level 1.1 or above (use compat=1.1 or " 2907 "greater)"); 2908 return 0; 2909 } 2910 2911 return refcount_bits; 2912 } 2913 2914 static int coroutine_fn 2915 qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) 2916 { 2917 BlockdevCreateOptionsQcow2 *qcow2_opts; 2918 QDict *options; 2919 2920 /* 2921 * Open the image file and write a minimal qcow2 header. 2922 * 2923 * We keep things simple and start with a zero-sized image. We also 2924 * do without refcount blocks or a L1 table for now. We'll fix the 2925 * inconsistency later. 2926 * 2927 * We do need a refcount table because growing the refcount table means 2928 * allocating two new refcount blocks - the seconds of which would be at 2929 * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file 2930 * size for any qcow2 image. 2931 */ 2932 BlockBackend *blk = NULL; 2933 BlockDriverState *bs = NULL; 2934 BlockDriverState *data_bs = NULL; 2935 QCowHeader *header; 2936 size_t cluster_size; 2937 int version; 2938 int refcount_order; 2939 uint64_t* refcount_table; 2940 Error *local_err = NULL; 2941 int ret; 2942 2943 assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2); 2944 qcow2_opts = &create_options->u.qcow2; 2945 2946 bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp); 2947 if (bs == NULL) { 2948 return -EIO; 2949 } 2950 2951 /* Validate options and set default values */ 2952 if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) { 2953 error_setg(errp, "Image size must be a multiple of 512 bytes"); 2954 ret = -EINVAL; 2955 goto out; 2956 } 2957 2958 if (qcow2_opts->has_version) { 2959 switch (qcow2_opts->version) { 2960 case BLOCKDEV_QCOW2_VERSION_V2: 2961 version = 2; 2962 break; 2963 case BLOCKDEV_QCOW2_VERSION_V3: 2964 version = 3; 2965 break; 2966 default: 2967 g_assert_not_reached(); 2968 } 2969 } else { 2970 version = 3; 2971 } 2972 2973 if (qcow2_opts->has_cluster_size) { 2974 cluster_size = qcow2_opts->cluster_size; 2975 } else { 2976 cluster_size = DEFAULT_CLUSTER_SIZE; 2977 } 2978 2979 if (!validate_cluster_size(cluster_size, errp)) { 2980 ret = -EINVAL; 2981 goto out; 2982 } 2983 2984 if (!qcow2_opts->has_preallocation) { 2985 qcow2_opts->preallocation = PREALLOC_MODE_OFF; 2986 } 2987 if (qcow2_opts->has_backing_file && 2988 qcow2_opts->preallocation != PREALLOC_MODE_OFF) 2989 { 2990 error_setg(errp, "Backing file and preallocation cannot be used at " 2991 "the same time"); 2992 ret = -EINVAL; 2993 goto out; 2994 } 2995 if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) { 2996 error_setg(errp, "Backing format cannot be used without backing file"); 2997 ret = -EINVAL; 2998 goto out; 2999 } 3000 3001 if (!qcow2_opts->has_lazy_refcounts) { 3002 qcow2_opts->lazy_refcounts = false; 3003 } 3004 if (version < 3 && qcow2_opts->lazy_refcounts) { 3005 error_setg(errp, "Lazy refcounts only supported with compatibility " 3006 "level 1.1 and above (use version=v3 or greater)"); 3007 ret = -EINVAL; 3008 goto out; 3009 } 3010 3011 if (!qcow2_opts->has_refcount_bits) { 3012 qcow2_opts->refcount_bits = 16; 3013 } 3014 if (qcow2_opts->refcount_bits > 64 || 3015 !is_power_of_2(qcow2_opts->refcount_bits)) 3016 { 3017 error_setg(errp, "Refcount width must be a power of two and may not " 3018 "exceed 64 bits"); 3019 ret = -EINVAL; 3020 goto out; 3021 } 3022 if (version < 3 && qcow2_opts->refcount_bits != 16) { 3023 error_setg(errp, "Different refcount widths than 16 bits require " 3024 "compatibility level 1.1 or above (use version=v3 or " 3025 "greater)"); 3026 ret = -EINVAL; 3027 goto out; 3028 } 3029 refcount_order = ctz32(qcow2_opts->refcount_bits); 3030 3031 if (qcow2_opts->data_file_raw && !qcow2_opts->data_file) { 3032 error_setg(errp, "data-file-raw requires data-file"); 3033 ret = -EINVAL; 3034 goto out; 3035 } 3036 if (qcow2_opts->data_file_raw && qcow2_opts->has_backing_file) { 3037 error_setg(errp, "Backing file and data-file-raw cannot be used at " 3038 "the same time"); 3039 ret = -EINVAL; 3040 goto out; 3041 } 3042 3043 if (qcow2_opts->data_file) { 3044 if (version < 3) { 3045 error_setg(errp, "External data files are only supported with " 3046 "compatibility level 1.1 and above (use version=v3 or " 3047 "greater)"); 3048 ret = -EINVAL; 3049 goto out; 3050 } 3051 data_bs = bdrv_open_blockdev_ref(qcow2_opts->data_file, errp); 3052 if (bs == NULL) { 3053 ret = -EIO; 3054 goto out; 3055 } 3056 } 3057 3058 /* Create BlockBackend to write to the image */ 3059 blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); 3060 ret = blk_insert_bs(blk, bs, errp); 3061 if (ret < 0) { 3062 goto out; 3063 } 3064 blk_set_allow_write_beyond_eof(blk, true); 3065 3066 /* Clear the protocol layer and preallocate it if necessary */ 3067 ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp); 3068 if (ret < 0) { 3069 goto out; 3070 } 3071 3072 /* Write the header */ 3073 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); 3074 header = g_malloc0(cluster_size); 3075 *header = (QCowHeader) { 3076 .magic = cpu_to_be32(QCOW_MAGIC), 3077 .version = cpu_to_be32(version), 3078 .cluster_bits = cpu_to_be32(ctz32(cluster_size)), 3079 .size = cpu_to_be64(0), 3080 .l1_table_offset = cpu_to_be64(0), 3081 .l1_size = cpu_to_be32(0), 3082 .refcount_table_offset = cpu_to_be64(cluster_size), 3083 .refcount_table_clusters = cpu_to_be32(1), 3084 .refcount_order = cpu_to_be32(refcount_order), 3085 .header_length = cpu_to_be32(sizeof(*header)), 3086 }; 3087 3088 /* We'll update this to correct value later */ 3089 header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 3090 3091 if (qcow2_opts->lazy_refcounts) { 3092 header->compatible_features |= 3093 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); 3094 } 3095 if (data_bs) { 3096 header->incompatible_features |= 3097 cpu_to_be64(QCOW2_INCOMPAT_DATA_FILE); 3098 } 3099 if (qcow2_opts->data_file_raw) { 3100 header->autoclear_features |= 3101 cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW); 3102 } 3103 3104 ret = blk_pwrite(blk, 0, header, cluster_size, 0); 3105 g_free(header); 3106 if (ret < 0) { 3107 error_setg_errno(errp, -ret, "Could not write qcow2 header"); 3108 goto out; 3109 } 3110 3111 /* Write a refcount table with one refcount block */ 3112 refcount_table = g_malloc0(2 * cluster_size); 3113 refcount_table[0] = cpu_to_be64(2 * cluster_size); 3114 ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0); 3115 g_free(refcount_table); 3116 3117 if (ret < 0) { 3118 error_setg_errno(errp, -ret, "Could not write refcount table"); 3119 goto out; 3120 } 3121 3122 blk_unref(blk); 3123 blk = NULL; 3124 3125 /* 3126 * And now open the image and make it consistent first (i.e. increase the 3127 * refcount of the cluster that is occupied by the header and the refcount 3128 * table) 3129 */ 3130 options = qdict_new(); 3131 qdict_put_str(options, "driver", "qcow2"); 3132 qdict_put_str(options, "file", bs->node_name); 3133 if (data_bs) { 3134 qdict_put_str(options, "data-file", data_bs->node_name); 3135 } 3136 blk = blk_new_open(NULL, NULL, options, 3137 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, 3138 &local_err); 3139 if (blk == NULL) { 3140 error_propagate(errp, local_err); 3141 ret = -EIO; 3142 goto out; 3143 } 3144 3145 ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); 3146 if (ret < 0) { 3147 error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " 3148 "header and refcount table"); 3149 goto out; 3150 3151 } else if (ret != 0) { 3152 error_report("Huh, first cluster in empty image is already in use?"); 3153 abort(); 3154 } 3155 3156 /* Set the external data file if necessary */ 3157 if (data_bs) { 3158 BDRVQcow2State *s = blk_bs(blk)->opaque; 3159 s->image_data_file = g_strdup(data_bs->filename); 3160 } 3161 3162 /* Create a full header (including things like feature table) */ 3163 ret = qcow2_update_header(blk_bs(blk)); 3164 if (ret < 0) { 3165 error_setg_errno(errp, -ret, "Could not update qcow2 header"); 3166 goto out; 3167 } 3168 3169 /* Okay, now that we have a valid image, let's give it the right size */ 3170 ret = blk_truncate(blk, qcow2_opts->size, qcow2_opts->preallocation, errp); 3171 if (ret < 0) { 3172 error_prepend(errp, "Could not resize image: "); 3173 goto out; 3174 } 3175 3176 /* Want a backing file? There you go.*/ 3177 if (qcow2_opts->has_backing_file) { 3178 const char *backing_format = NULL; 3179 3180 if (qcow2_opts->has_backing_fmt) { 3181 backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt); 3182 } 3183 3184 ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file, 3185 backing_format); 3186 if (ret < 0) { 3187 error_setg_errno(errp, -ret, "Could not assign backing file '%s' " 3188 "with format '%s'", qcow2_opts->backing_file, 3189 backing_format); 3190 goto out; 3191 } 3192 } 3193 3194 /* Want encryption? There you go. */ 3195 if (qcow2_opts->has_encrypt) { 3196 ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp); 3197 if (ret < 0) { 3198 goto out; 3199 } 3200 } 3201 3202 blk_unref(blk); 3203 blk = NULL; 3204 3205 /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. 3206 * Using BDRV_O_NO_IO, since encryption is now setup we don't want to 3207 * have to setup decryption context. We're not doing any I/O on the top 3208 * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does 3209 * not have effect. 3210 */ 3211 options = qdict_new(); 3212 qdict_put_str(options, "driver", "qcow2"); 3213 qdict_put_str(options, "file", bs->node_name); 3214 if (data_bs) { 3215 qdict_put_str(options, "data-file", data_bs->node_name); 3216 } 3217 blk = blk_new_open(NULL, NULL, options, 3218 BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO, 3219 &local_err); 3220 if (blk == NULL) { 3221 error_propagate(errp, local_err); 3222 ret = -EIO; 3223 goto out; 3224 } 3225 3226 ret = 0; 3227 out: 3228 blk_unref(blk); 3229 bdrv_unref(bs); 3230 bdrv_unref(data_bs); 3231 return ret; 3232 } 3233 3234 static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, 3235 Error **errp) 3236 { 3237 BlockdevCreateOptions *create_options = NULL; 3238 QDict *qdict; 3239 Visitor *v; 3240 BlockDriverState *bs = NULL; 3241 BlockDriverState *data_bs = NULL; 3242 Error *local_err = NULL; 3243 const char *val; 3244 int ret; 3245 3246 /* Only the keyval visitor supports the dotted syntax needed for 3247 * encryption, so go through a QDict before getting a QAPI type. Ignore 3248 * options meant for the protocol layer so that the visitor doesn't 3249 * complain. */ 3250 qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts, 3251 true); 3252 3253 /* Handle encryption options */ 3254 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT); 3255 if (val && !strcmp(val, "on")) { 3256 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow"); 3257 } else if (val && !strcmp(val, "off")) { 3258 qdict_del(qdict, BLOCK_OPT_ENCRYPT); 3259 } 3260 3261 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT); 3262 if (val && !strcmp(val, "aes")) { 3263 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow"); 3264 } 3265 3266 /* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into 3267 * version=v2/v3 below. */ 3268 val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL); 3269 if (val && !strcmp(val, "0.10")) { 3270 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2"); 3271 } else if (val && !strcmp(val, "1.1")) { 3272 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3"); 3273 } 3274 3275 /* Change legacy command line options into QMP ones */ 3276 static const QDictRenames opt_renames[] = { 3277 { BLOCK_OPT_BACKING_FILE, "backing-file" }, 3278 { BLOCK_OPT_BACKING_FMT, "backing-fmt" }, 3279 { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" }, 3280 { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" }, 3281 { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" }, 3282 { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT }, 3283 { BLOCK_OPT_COMPAT_LEVEL, "version" }, 3284 { BLOCK_OPT_DATA_FILE_RAW, "data-file-raw" }, 3285 { NULL, NULL }, 3286 }; 3287 3288 if (!qdict_rename_keys(qdict, opt_renames, errp)) { 3289 ret = -EINVAL; 3290 goto finish; 3291 } 3292 3293 /* Create and open the file (protocol layer) */ 3294 ret = bdrv_create_file(filename, opts, errp); 3295 if (ret < 0) { 3296 goto finish; 3297 } 3298 3299 bs = bdrv_open(filename, NULL, NULL, 3300 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); 3301 if (bs == NULL) { 3302 ret = -EIO; 3303 goto finish; 3304 } 3305 3306 /* Create and open an external data file (protocol layer) */ 3307 val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE); 3308 if (val) { 3309 ret = bdrv_create_file(val, opts, errp); 3310 if (ret < 0) { 3311 goto finish; 3312 } 3313 3314 data_bs = bdrv_open(val, NULL, NULL, 3315 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, 3316 errp); 3317 if (data_bs == NULL) { 3318 ret = -EIO; 3319 goto finish; 3320 } 3321 3322 qdict_del(qdict, BLOCK_OPT_DATA_FILE); 3323 qdict_put_str(qdict, "data-file", data_bs->node_name); 3324 } 3325 3326 /* Set 'driver' and 'node' options */ 3327 qdict_put_str(qdict, "driver", "qcow2"); 3328 qdict_put_str(qdict, "file", bs->node_name); 3329 3330 /* Now get the QAPI type BlockdevCreateOptions */ 3331 v = qobject_input_visitor_new_flat_confused(qdict, errp); 3332 if (!v) { 3333 ret = -EINVAL; 3334 goto finish; 3335 } 3336 3337 visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err); 3338 visit_free(v); 3339 3340 if (local_err) { 3341 error_propagate(errp, local_err); 3342 ret = -EINVAL; 3343 goto finish; 3344 } 3345 3346 /* Silently round up size */ 3347 create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size, 3348 BDRV_SECTOR_SIZE); 3349 3350 /* Create the qcow2 image (format layer) */ 3351 ret = qcow2_co_create(create_options, errp); 3352 if (ret < 0) { 3353 goto finish; 3354 } 3355 3356 ret = 0; 3357 finish: 3358 qobject_unref(qdict); 3359 bdrv_unref(bs); 3360 bdrv_unref(data_bs); 3361 qapi_free_BlockdevCreateOptions(create_options); 3362 return ret; 3363 } 3364 3365 3366 static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) 3367 { 3368 int64_t nr; 3369 int res; 3370 3371 /* Clamp to image length, before checking status of underlying sectors */ 3372 if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { 3373 bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset; 3374 } 3375 3376 if (!bytes) { 3377 return true; 3378 } 3379 res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL); 3380 return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes; 3381 } 3382 3383 static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, 3384 int64_t offset, int bytes, BdrvRequestFlags flags) 3385 { 3386 int ret; 3387 BDRVQcow2State *s = bs->opaque; 3388 3389 uint32_t head = offset % s->cluster_size; 3390 uint32_t tail = (offset + bytes) % s->cluster_size; 3391 3392 trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes); 3393 if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) { 3394 tail = 0; 3395 } 3396 3397 if (head || tail) { 3398 uint64_t off; 3399 unsigned int nr; 3400 3401 assert(head + bytes <= s->cluster_size); 3402 3403 /* check whether remainder of cluster already reads as zero */ 3404 if (!(is_zero(bs, offset - head, head) && 3405 is_zero(bs, offset + bytes, 3406 tail ? s->cluster_size - tail : 0))) { 3407 return -ENOTSUP; 3408 } 3409 3410 qemu_co_mutex_lock(&s->lock); 3411 /* We can have new write after previous check */ 3412 offset = QEMU_ALIGN_DOWN(offset, s->cluster_size); 3413 bytes = s->cluster_size; 3414 nr = s->cluster_size; 3415 ret = qcow2_get_cluster_offset(bs, offset, &nr, &off); 3416 if (ret != QCOW2_CLUSTER_UNALLOCATED && 3417 ret != QCOW2_CLUSTER_ZERO_PLAIN && 3418 ret != QCOW2_CLUSTER_ZERO_ALLOC) { 3419 qemu_co_mutex_unlock(&s->lock); 3420 return -ENOTSUP; 3421 } 3422 } else { 3423 qemu_co_mutex_lock(&s->lock); 3424 } 3425 3426 trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes); 3427 3428 /* Whatever is left can use real zero clusters */ 3429 ret = qcow2_cluster_zeroize(bs, offset, bytes, flags); 3430 qemu_co_mutex_unlock(&s->lock); 3431 3432 return ret; 3433 } 3434 3435 static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, 3436 int64_t offset, int bytes) 3437 { 3438 int ret; 3439 BDRVQcow2State *s = bs->opaque; 3440 3441 if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) { 3442 assert(bytes < s->cluster_size); 3443 /* Ignore partial clusters, except for the special case of the 3444 * complete partial cluster at the end of an unaligned file */ 3445 if (!QEMU_IS_ALIGNED(offset, s->cluster_size) || 3446 offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) { 3447 return -ENOTSUP; 3448 } 3449 } 3450 3451 qemu_co_mutex_lock(&s->lock); 3452 ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST, 3453 false); 3454 qemu_co_mutex_unlock(&s->lock); 3455 return ret; 3456 } 3457 3458 static int coroutine_fn 3459 qcow2_co_copy_range_from(BlockDriverState *bs, 3460 BdrvChild *src, uint64_t src_offset, 3461 BdrvChild *dst, uint64_t dst_offset, 3462 uint64_t bytes, BdrvRequestFlags read_flags, 3463 BdrvRequestFlags write_flags) 3464 { 3465 BDRVQcow2State *s = bs->opaque; 3466 int ret; 3467 unsigned int cur_bytes; /* number of bytes in current iteration */ 3468 BdrvChild *child = NULL; 3469 BdrvRequestFlags cur_write_flags; 3470 3471 assert(!bs->encrypted); 3472 qemu_co_mutex_lock(&s->lock); 3473 3474 while (bytes != 0) { 3475 uint64_t copy_offset = 0; 3476 /* prepare next request */ 3477 cur_bytes = MIN(bytes, INT_MAX); 3478 cur_write_flags = write_flags; 3479 3480 ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset); 3481 if (ret < 0) { 3482 goto out; 3483 } 3484 3485 switch (ret) { 3486 case QCOW2_CLUSTER_UNALLOCATED: 3487 if (bs->backing && bs->backing->bs) { 3488 int64_t backing_length = bdrv_getlength(bs->backing->bs); 3489 if (src_offset >= backing_length) { 3490 cur_write_flags |= BDRV_REQ_ZERO_WRITE; 3491 } else { 3492 child = bs->backing; 3493 cur_bytes = MIN(cur_bytes, backing_length - src_offset); 3494 copy_offset = src_offset; 3495 } 3496 } else { 3497 cur_write_flags |= BDRV_REQ_ZERO_WRITE; 3498 } 3499 break; 3500 3501 case QCOW2_CLUSTER_ZERO_PLAIN: 3502 case QCOW2_CLUSTER_ZERO_ALLOC: 3503 cur_write_flags |= BDRV_REQ_ZERO_WRITE; 3504 break; 3505 3506 case QCOW2_CLUSTER_COMPRESSED: 3507 ret = -ENOTSUP; 3508 goto out; 3509 3510 case QCOW2_CLUSTER_NORMAL: 3511 child = s->data_file; 3512 copy_offset += offset_into_cluster(s, src_offset); 3513 if ((copy_offset & 511) != 0) { 3514 ret = -EIO; 3515 goto out; 3516 } 3517 break; 3518 3519 default: 3520 abort(); 3521 } 3522 qemu_co_mutex_unlock(&s->lock); 3523 ret = bdrv_co_copy_range_from(child, 3524 copy_offset, 3525 dst, dst_offset, 3526 cur_bytes, read_flags, cur_write_flags); 3527 qemu_co_mutex_lock(&s->lock); 3528 if (ret < 0) { 3529 goto out; 3530 } 3531 3532 bytes -= cur_bytes; 3533 src_offset += cur_bytes; 3534 dst_offset += cur_bytes; 3535 } 3536 ret = 0; 3537 3538 out: 3539 qemu_co_mutex_unlock(&s->lock); 3540 return ret; 3541 } 3542 3543 static int coroutine_fn 3544 qcow2_co_copy_range_to(BlockDriverState *bs, 3545 BdrvChild *src, uint64_t src_offset, 3546 BdrvChild *dst, uint64_t dst_offset, 3547 uint64_t bytes, BdrvRequestFlags read_flags, 3548 BdrvRequestFlags write_flags) 3549 { 3550 BDRVQcow2State *s = bs->opaque; 3551 int offset_in_cluster; 3552 int ret; 3553 unsigned int cur_bytes; /* number of sectors in current iteration */ 3554 uint64_t cluster_offset; 3555 QCowL2Meta *l2meta = NULL; 3556 3557 assert(!bs->encrypted); 3558 3559 qemu_co_mutex_lock(&s->lock); 3560 3561 while (bytes != 0) { 3562 3563 l2meta = NULL; 3564 3565 offset_in_cluster = offset_into_cluster(s, dst_offset); 3566 cur_bytes = MIN(bytes, INT_MAX); 3567 3568 /* TODO: 3569 * If src->bs == dst->bs, we could simply copy by incrementing 3570 * the refcnt, without copying user data. 3571 * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */ 3572 ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes, 3573 &cluster_offset, &l2meta); 3574 if (ret < 0) { 3575 goto fail; 3576 } 3577 3578 assert((cluster_offset & 511) == 0); 3579 3580 ret = qcow2_pre_write_overlap_check(bs, 0, 3581 cluster_offset + offset_in_cluster, cur_bytes, true); 3582 if (ret < 0) { 3583 goto fail; 3584 } 3585 3586 qemu_co_mutex_unlock(&s->lock); 3587 ret = bdrv_co_copy_range_to(src, src_offset, 3588 s->data_file, 3589 cluster_offset + offset_in_cluster, 3590 cur_bytes, read_flags, write_flags); 3591 qemu_co_mutex_lock(&s->lock); 3592 if (ret < 0) { 3593 goto fail; 3594 } 3595 3596 ret = qcow2_handle_l2meta(bs, &l2meta, true); 3597 if (ret) { 3598 goto fail; 3599 } 3600 3601 bytes -= cur_bytes; 3602 src_offset += cur_bytes; 3603 dst_offset += cur_bytes; 3604 } 3605 ret = 0; 3606 3607 fail: 3608 qcow2_handle_l2meta(bs, &l2meta, false); 3609 3610 qemu_co_mutex_unlock(&s->lock); 3611 3612 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 3613 3614 return ret; 3615 } 3616 3617 static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, 3618 PreallocMode prealloc, Error **errp) 3619 { 3620 BDRVQcow2State *s = bs->opaque; 3621 uint64_t old_length; 3622 int64_t new_l1_size; 3623 int ret; 3624 QDict *options; 3625 3626 if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA && 3627 prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL) 3628 { 3629 error_setg(errp, "Unsupported preallocation mode '%s'", 3630 PreallocMode_str(prealloc)); 3631 return -ENOTSUP; 3632 } 3633 3634 if (offset & 511) { 3635 error_setg(errp, "The new size must be a multiple of 512"); 3636 return -EINVAL; 3637 } 3638 3639 qemu_co_mutex_lock(&s->lock); 3640 3641 /* cannot proceed if image has snapshots */ 3642 if (s->nb_snapshots) { 3643 error_setg(errp, "Can't resize an image which has snapshots"); 3644 ret = -ENOTSUP; 3645 goto fail; 3646 } 3647 3648 /* cannot proceed if image has bitmaps */ 3649 if (s->nb_bitmaps) { 3650 /* TODO: resize bitmaps in the image */ 3651 error_setg(errp, "Can't resize an image which has bitmaps"); 3652 ret = -ENOTSUP; 3653 goto fail; 3654 } 3655 3656 old_length = bs->total_sectors * BDRV_SECTOR_SIZE; 3657 new_l1_size = size_to_l1(s, offset); 3658 3659 if (offset < old_length) { 3660 int64_t last_cluster, old_file_size; 3661 if (prealloc != PREALLOC_MODE_OFF) { 3662 error_setg(errp, 3663 "Preallocation can't be used for shrinking an image"); 3664 ret = -EINVAL; 3665 goto fail; 3666 } 3667 3668 ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), 3669 old_length - ROUND_UP(offset, 3670 s->cluster_size), 3671 QCOW2_DISCARD_ALWAYS, true); 3672 if (ret < 0) { 3673 error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); 3674 goto fail; 3675 } 3676 3677 ret = qcow2_shrink_l1_table(bs, new_l1_size); 3678 if (ret < 0) { 3679 error_setg_errno(errp, -ret, 3680 "Failed to reduce the number of L2 tables"); 3681 goto fail; 3682 } 3683 3684 ret = qcow2_shrink_reftable(bs); 3685 if (ret < 0) { 3686 error_setg_errno(errp, -ret, 3687 "Failed to discard unused refblocks"); 3688 goto fail; 3689 } 3690 3691 old_file_size = bdrv_getlength(bs->file->bs); 3692 if (old_file_size < 0) { 3693 error_setg_errno(errp, -old_file_size, 3694 "Failed to inquire current file length"); 3695 ret = old_file_size; 3696 goto fail; 3697 } 3698 last_cluster = qcow2_get_last_cluster(bs, old_file_size); 3699 if (last_cluster < 0) { 3700 error_setg_errno(errp, -last_cluster, 3701 "Failed to find the last cluster"); 3702 ret = last_cluster; 3703 goto fail; 3704 } 3705 if ((last_cluster + 1) * s->cluster_size < old_file_size) { 3706 Error *local_err = NULL; 3707 3708 bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, 3709 PREALLOC_MODE_OFF, &local_err); 3710 if (local_err) { 3711 warn_reportf_err(local_err, 3712 "Failed to truncate the tail of the image: "); 3713 } 3714 } 3715 } else { 3716 ret = qcow2_grow_l1_table(bs, new_l1_size, true); 3717 if (ret < 0) { 3718 error_setg_errno(errp, -ret, "Failed to grow the L1 table"); 3719 goto fail; 3720 } 3721 } 3722 3723 switch (prealloc) { 3724 case PREALLOC_MODE_OFF: 3725 break; 3726 3727 case PREALLOC_MODE_METADATA: 3728 ret = preallocate_co(bs, old_length, offset); 3729 if (ret < 0) { 3730 error_setg_errno(errp, -ret, "Preallocation failed"); 3731 goto fail; 3732 } 3733 break; 3734 3735 case PREALLOC_MODE_FALLOC: 3736 case PREALLOC_MODE_FULL: 3737 { 3738 int64_t allocation_start, host_offset, guest_offset; 3739 int64_t clusters_allocated; 3740 int64_t old_file_size, new_file_size; 3741 uint64_t nb_new_data_clusters, nb_new_l2_tables; 3742 3743 /* With a data file, preallocation means just allocating the metadata 3744 * and forwarding the truncate request to the data file */ 3745 if (has_data_file(bs)) { 3746 ret = preallocate_co(bs, old_length, offset); 3747 if (ret < 0) { 3748 error_setg_errno(errp, -ret, "Preallocation failed"); 3749 goto fail; 3750 } 3751 break; 3752 } 3753 3754 old_file_size = bdrv_getlength(bs->file->bs); 3755 if (old_file_size < 0) { 3756 error_setg_errno(errp, -old_file_size, 3757 "Failed to inquire current file length"); 3758 ret = old_file_size; 3759 goto fail; 3760 } 3761 old_file_size = ROUND_UP(old_file_size, s->cluster_size); 3762 3763 nb_new_data_clusters = DIV_ROUND_UP(offset - old_length, 3764 s->cluster_size); 3765 3766 /* This is an overestimation; we will not actually allocate space for 3767 * these in the file but just make sure the new refcount structures are 3768 * able to cover them so we will not have to allocate new refblocks 3769 * while entering the data blocks in the potentially new L2 tables. 3770 * (We do not actually care where the L2 tables are placed. Maybe they 3771 * are already allocated or they can be placed somewhere before 3772 * @old_file_size. It does not matter because they will be fully 3773 * allocated automatically, so they do not need to be covered by the 3774 * preallocation. All that matters is that we will not have to allocate 3775 * new refcount structures for them.) */ 3776 nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters, 3777 s->cluster_size / sizeof(uint64_t)); 3778 /* The cluster range may not be aligned to L2 boundaries, so add one L2 3779 * table for a potential head/tail */ 3780 nb_new_l2_tables++; 3781 3782 allocation_start = qcow2_refcount_area(bs, old_file_size, 3783 nb_new_data_clusters + 3784 nb_new_l2_tables, 3785 true, 0, 0); 3786 if (allocation_start < 0) { 3787 error_setg_errno(errp, -allocation_start, 3788 "Failed to resize refcount structures"); 3789 ret = allocation_start; 3790 goto fail; 3791 } 3792 3793 clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, 3794 nb_new_data_clusters); 3795 if (clusters_allocated < 0) { 3796 error_setg_errno(errp, -clusters_allocated, 3797 "Failed to allocate data clusters"); 3798 ret = clusters_allocated; 3799 goto fail; 3800 } 3801 3802 assert(clusters_allocated == nb_new_data_clusters); 3803 3804 /* Allocate the data area */ 3805 new_file_size = allocation_start + 3806 nb_new_data_clusters * s->cluster_size; 3807 ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp); 3808 if (ret < 0) { 3809 error_prepend(errp, "Failed to resize underlying file: "); 3810 qcow2_free_clusters(bs, allocation_start, 3811 nb_new_data_clusters * s->cluster_size, 3812 QCOW2_DISCARD_OTHER); 3813 goto fail; 3814 } 3815 3816 /* Create the necessary L2 entries */ 3817 host_offset = allocation_start; 3818 guest_offset = old_length; 3819 while (nb_new_data_clusters) { 3820 int64_t nb_clusters = MIN( 3821 nb_new_data_clusters, 3822 s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset)); 3823 QCowL2Meta allocation = { 3824 .offset = guest_offset, 3825 .alloc_offset = host_offset, 3826 .nb_clusters = nb_clusters, 3827 }; 3828 qemu_co_queue_init(&allocation.dependent_requests); 3829 3830 ret = qcow2_alloc_cluster_link_l2(bs, &allocation); 3831 if (ret < 0) { 3832 error_setg_errno(errp, -ret, "Failed to update L2 tables"); 3833 qcow2_free_clusters(bs, host_offset, 3834 nb_new_data_clusters * s->cluster_size, 3835 QCOW2_DISCARD_OTHER); 3836 goto fail; 3837 } 3838 3839 guest_offset += nb_clusters * s->cluster_size; 3840 host_offset += nb_clusters * s->cluster_size; 3841 nb_new_data_clusters -= nb_clusters; 3842 } 3843 break; 3844 } 3845 3846 default: 3847 g_assert_not_reached(); 3848 } 3849 3850 if (prealloc != PREALLOC_MODE_OFF) { 3851 /* Flush metadata before actually changing the image size */ 3852 ret = qcow2_write_caches(bs); 3853 if (ret < 0) { 3854 error_setg_errno(errp, -ret, 3855 "Failed to flush the preallocated area to disk"); 3856 goto fail; 3857 } 3858 } 3859 3860 bs->total_sectors = offset / BDRV_SECTOR_SIZE; 3861 3862 if (has_data_file(bs)) { 3863 if (prealloc == PREALLOC_MODE_METADATA) { 3864 prealloc = PREALLOC_MODE_OFF; 3865 } 3866 ret = bdrv_co_truncate(s->data_file, offset, prealloc, errp); 3867 if (ret < 0) { 3868 goto fail; 3869 } 3870 } 3871 3872 /* write updated header.size */ 3873 offset = cpu_to_be64(offset); 3874 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), 3875 &offset, sizeof(uint64_t)); 3876 if (ret < 0) { 3877 error_setg_errno(errp, -ret, "Failed to update the image size"); 3878 goto fail; 3879 } 3880 3881 s->l1_vm_state_index = new_l1_size; 3882 3883 /* Update cache sizes */ 3884 options = qdict_clone_shallow(bs->options); 3885 ret = qcow2_update_options(bs, options, s->flags, errp); 3886 qobject_unref(options); 3887 if (ret < 0) { 3888 goto fail; 3889 } 3890 ret = 0; 3891 fail: 3892 qemu_co_mutex_unlock(&s->lock); 3893 return ret; 3894 } 3895 3896 /* 3897 * qcow2_compress() 3898 * 3899 * @dest - destination buffer, @dest_size bytes 3900 * @src - source buffer, @src_size bytes 3901 * 3902 * Returns: compressed size on success 3903 * -1 destination buffer is not enough to store compressed data 3904 * -2 on any other error 3905 */ 3906 static ssize_t qcow2_compress(void *dest, size_t dest_size, 3907 const void *src, size_t src_size) 3908 { 3909 ssize_t ret; 3910 z_stream strm; 3911 3912 /* best compression, small window, no zlib header */ 3913 memset(&strm, 0, sizeof(strm)); 3914 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 3915 -12, 9, Z_DEFAULT_STRATEGY); 3916 if (ret != Z_OK) { 3917 return -2; 3918 } 3919 3920 /* strm.next_in is not const in old zlib versions, such as those used on 3921 * OpenBSD/NetBSD, so cast the const away */ 3922 strm.avail_in = src_size; 3923 strm.next_in = (void *) src; 3924 strm.avail_out = dest_size; 3925 strm.next_out = dest; 3926 3927 ret = deflate(&strm, Z_FINISH); 3928 if (ret == Z_STREAM_END) { 3929 ret = dest_size - strm.avail_out; 3930 } else { 3931 ret = (ret == Z_OK ? -1 : -2); 3932 } 3933 3934 deflateEnd(&strm); 3935 3936 return ret; 3937 } 3938 3939 /* 3940 * qcow2_decompress() 3941 * 3942 * Decompress some data (not more than @src_size bytes) to produce exactly 3943 * @dest_size bytes. 3944 * 3945 * @dest - destination buffer, @dest_size bytes 3946 * @src - source buffer, @src_size bytes 3947 * 3948 * Returns: 0 on success 3949 * -1 on fail 3950 */ 3951 static ssize_t qcow2_decompress(void *dest, size_t dest_size, 3952 const void *src, size_t src_size) 3953 { 3954 int ret = 0; 3955 z_stream strm; 3956 3957 memset(&strm, 0, sizeof(strm)); 3958 strm.avail_in = src_size; 3959 strm.next_in = (void *) src; 3960 strm.avail_out = dest_size; 3961 strm.next_out = dest; 3962 3963 ret = inflateInit2(&strm, -12); 3964 if (ret != Z_OK) { 3965 return -1; 3966 } 3967 3968 ret = inflate(&strm, Z_FINISH); 3969 if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) { 3970 /* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but 3971 * @src buffer may be processed partly (because in qcow2 we know size of 3972 * compressed data with precision of one sector) */ 3973 ret = -1; 3974 } 3975 3976 inflateEnd(&strm); 3977 3978 return ret; 3979 } 3980 3981 #define MAX_COMPRESS_THREADS 4 3982 3983 typedef ssize_t (*Qcow2CompressFunc)(void *dest, size_t dest_size, 3984 const void *src, size_t src_size); 3985 typedef struct Qcow2CompressData { 3986 void *dest; 3987 size_t dest_size; 3988 const void *src; 3989 size_t src_size; 3990 ssize_t ret; 3991 3992 Qcow2CompressFunc func; 3993 } Qcow2CompressData; 3994 3995 static int qcow2_compress_pool_func(void *opaque) 3996 { 3997 Qcow2CompressData *data = opaque; 3998 3999 data->ret = data->func(data->dest, data->dest_size, 4000 data->src, data->src_size); 4001 4002 return 0; 4003 } 4004 4005 static void qcow2_compress_complete(void *opaque, int ret) 4006 { 4007 qemu_coroutine_enter(opaque); 4008 } 4009 4010 static ssize_t coroutine_fn 4011 qcow2_co_do_compress(BlockDriverState *bs, void *dest, size_t dest_size, 4012 const void *src, size_t src_size, Qcow2CompressFunc func) 4013 { 4014 BDRVQcow2State *s = bs->opaque; 4015 BlockAIOCB *acb; 4016 ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); 4017 Qcow2CompressData arg = { 4018 .dest = dest, 4019 .dest_size = dest_size, 4020 .src = src, 4021 .src_size = src_size, 4022 .func = func, 4023 }; 4024 4025 while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) { 4026 qemu_co_queue_wait(&s->compress_wait_queue, NULL); 4027 } 4028 4029 s->nb_compress_threads++; 4030 acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg, 4031 qcow2_compress_complete, 4032 qemu_coroutine_self()); 4033 4034 if (!acb) { 4035 s->nb_compress_threads--; 4036 return -EINVAL; 4037 } 4038 qemu_coroutine_yield(); 4039 s->nb_compress_threads--; 4040 qemu_co_queue_next(&s->compress_wait_queue); 4041 4042 return arg.ret; 4043 } 4044 4045 static ssize_t coroutine_fn 4046 qcow2_co_compress(BlockDriverState *bs, void *dest, size_t dest_size, 4047 const void *src, size_t src_size) 4048 { 4049 return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, 4050 qcow2_compress); 4051 } 4052 4053 static ssize_t coroutine_fn 4054 qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size, 4055 const void *src, size_t src_size) 4056 { 4057 return qcow2_co_do_compress(bs, dest, dest_size, src, src_size, 4058 qcow2_decompress); 4059 } 4060 4061 /* XXX: put compressed sectors first, then all the cluster aligned 4062 tables to avoid losing bytes in alignment */ 4063 static coroutine_fn int 4064 qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, 4065 uint64_t bytes, QEMUIOVector *qiov) 4066 { 4067 BDRVQcow2State *s = bs->opaque; 4068 QEMUIOVector hd_qiov; 4069 int ret; 4070 size_t out_len; 4071 uint8_t *buf, *out_buf; 4072 uint64_t cluster_offset; 4073 4074 if (has_data_file(bs)) { 4075 return -ENOTSUP; 4076 } 4077 4078 if (bytes == 0) { 4079 /* align end of file to a sector boundary to ease reading with 4080 sector based I/Os */ 4081 int64_t len = bdrv_getlength(bs->file->bs); 4082 if (len < 0) { 4083 return len; 4084 } 4085 return bdrv_co_truncate(bs->file, len, PREALLOC_MODE_OFF, NULL); 4086 } 4087 4088 if (offset_into_cluster(s, offset)) { 4089 return -EINVAL; 4090 } 4091 4092 buf = qemu_blockalign(bs, s->cluster_size); 4093 if (bytes != s->cluster_size) { 4094 if (bytes > s->cluster_size || 4095 offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS) 4096 { 4097 qemu_vfree(buf); 4098 return -EINVAL; 4099 } 4100 /* Zero-pad last write if image size is not cluster aligned */ 4101 memset(buf + bytes, 0, s->cluster_size - bytes); 4102 } 4103 qemu_iovec_to_buf(qiov, 0, buf, bytes); 4104 4105 out_buf = g_malloc(s->cluster_size); 4106 4107 out_len = qcow2_co_compress(bs, out_buf, s->cluster_size - 1, 4108 buf, s->cluster_size); 4109 if (out_len == -2) { 4110 ret = -EINVAL; 4111 goto fail; 4112 } else if (out_len == -1) { 4113 /* could not compress: write normal cluster */ 4114 ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0); 4115 if (ret < 0) { 4116 goto fail; 4117 } 4118 goto success; 4119 } 4120 4121 qemu_co_mutex_lock(&s->lock); 4122 ret = qcow2_alloc_compressed_cluster_offset(bs, offset, out_len, 4123 &cluster_offset); 4124 if (ret < 0) { 4125 qemu_co_mutex_unlock(&s->lock); 4126 goto fail; 4127 } 4128 4129 ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true); 4130 qemu_co_mutex_unlock(&s->lock); 4131 if (ret < 0) { 4132 goto fail; 4133 } 4134 4135 qemu_iovec_init_buf(&hd_qiov, out_buf, out_len); 4136 4137 BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED); 4138 ret = bdrv_co_pwritev(s->data_file, cluster_offset, out_len, &hd_qiov, 0); 4139 if (ret < 0) { 4140 goto fail; 4141 } 4142 success: 4143 ret = 0; 4144 fail: 4145 qemu_vfree(buf); 4146 g_free(out_buf); 4147 return ret; 4148 } 4149 4150 static int coroutine_fn 4151 qcow2_co_preadv_compressed(BlockDriverState *bs, 4152 uint64_t file_cluster_offset, 4153 uint64_t offset, 4154 uint64_t bytes, 4155 QEMUIOVector *qiov) 4156 { 4157 BDRVQcow2State *s = bs->opaque; 4158 int ret = 0, csize, nb_csectors; 4159 uint64_t coffset; 4160 uint8_t *buf, *out_buf; 4161 QEMUIOVector local_qiov; 4162 int offset_in_cluster = offset_into_cluster(s, offset); 4163 4164 coffset = file_cluster_offset & s->cluster_offset_mask; 4165 nb_csectors = ((file_cluster_offset >> s->csize_shift) & s->csize_mask) + 1; 4166 csize = nb_csectors * 512 - (coffset & 511); 4167 4168 buf = g_try_malloc(csize); 4169 if (!buf) { 4170 return -ENOMEM; 4171 } 4172 qemu_iovec_init_buf(&local_qiov, buf, csize); 4173 4174 out_buf = qemu_blockalign(bs, s->cluster_size); 4175 4176 BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); 4177 ret = bdrv_co_preadv(bs->file, coffset, csize, &local_qiov, 0); 4178 if (ret < 0) { 4179 goto fail; 4180 } 4181 4182 if (qcow2_co_decompress(bs, out_buf, s->cluster_size, buf, csize) < 0) { 4183 ret = -EIO; 4184 goto fail; 4185 } 4186 4187 qemu_iovec_from_buf(qiov, 0, out_buf + offset_in_cluster, bytes); 4188 4189 fail: 4190 qemu_vfree(out_buf); 4191 g_free(buf); 4192 4193 return ret; 4194 } 4195 4196 static int make_completely_empty(BlockDriverState *bs) 4197 { 4198 BDRVQcow2State *s = bs->opaque; 4199 Error *local_err = NULL; 4200 int ret, l1_clusters; 4201 int64_t offset; 4202 uint64_t *new_reftable = NULL; 4203 uint64_t rt_entry, l1_size2; 4204 struct { 4205 uint64_t l1_offset; 4206 uint64_t reftable_offset; 4207 uint32_t reftable_clusters; 4208 } QEMU_PACKED l1_ofs_rt_ofs_cls; 4209 4210 ret = qcow2_cache_empty(bs, s->l2_table_cache); 4211 if (ret < 0) { 4212 goto fail; 4213 } 4214 4215 ret = qcow2_cache_empty(bs, s->refcount_block_cache); 4216 if (ret < 0) { 4217 goto fail; 4218 } 4219 4220 /* Refcounts will be broken utterly */ 4221 ret = qcow2_mark_dirty(bs); 4222 if (ret < 0) { 4223 goto fail; 4224 } 4225 4226 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 4227 4228 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 4229 l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t); 4230 4231 /* After this call, neither the in-memory nor the on-disk refcount 4232 * information accurately describe the actual references */ 4233 4234 ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset, 4235 l1_clusters * s->cluster_size, 0); 4236 if (ret < 0) { 4237 goto fail_broken_refcounts; 4238 } 4239 memset(s->l1_table, 0, l1_size2); 4240 4241 BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE); 4242 4243 /* Overwrite enough clusters at the beginning of the sectors to place 4244 * the refcount table, a refcount block and the L1 table in; this may 4245 * overwrite parts of the existing refcount and L1 table, which is not 4246 * an issue because the dirty flag is set, complete data loss is in fact 4247 * desired and partial data loss is consequently fine as well */ 4248 ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size, 4249 (2 + l1_clusters) * s->cluster_size, 0); 4250 /* This call (even if it failed overall) may have overwritten on-disk 4251 * refcount structures; in that case, the in-memory refcount information 4252 * will probably differ from the on-disk information which makes the BDS 4253 * unusable */ 4254 if (ret < 0) { 4255 goto fail_broken_refcounts; 4256 } 4257 4258 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 4259 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); 4260 4261 /* "Create" an empty reftable (one cluster) directly after the image 4262 * header and an empty L1 table three clusters after the image header; 4263 * the cluster between those two will be used as the first refblock */ 4264 l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size); 4265 l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size); 4266 l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1); 4267 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset), 4268 &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls)); 4269 if (ret < 0) { 4270 goto fail_broken_refcounts; 4271 } 4272 4273 s->l1_table_offset = 3 * s->cluster_size; 4274 4275 new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t)); 4276 if (!new_reftable) { 4277 ret = -ENOMEM; 4278 goto fail_broken_refcounts; 4279 } 4280 4281 s->refcount_table_offset = s->cluster_size; 4282 s->refcount_table_size = s->cluster_size / sizeof(uint64_t); 4283 s->max_refcount_table_index = 0; 4284 4285 g_free(s->refcount_table); 4286 s->refcount_table = new_reftable; 4287 new_reftable = NULL; 4288 4289 /* Now the in-memory refcount information again corresponds to the on-disk 4290 * information (reftable is empty and no refblocks (the refblock cache is 4291 * empty)); however, this means some clusters (e.g. the image header) are 4292 * referenced, but not refcounted, but the normal qcow2 code assumes that 4293 * the in-memory information is always correct */ 4294 4295 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); 4296 4297 /* Enter the first refblock into the reftable */ 4298 rt_entry = cpu_to_be64(2 * s->cluster_size); 4299 ret = bdrv_pwrite_sync(bs->file, s->cluster_size, 4300 &rt_entry, sizeof(rt_entry)); 4301 if (ret < 0) { 4302 goto fail_broken_refcounts; 4303 } 4304 s->refcount_table[0] = 2 * s->cluster_size; 4305 4306 s->free_cluster_index = 0; 4307 assert(3 + l1_clusters <= s->refcount_block_size); 4308 offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2); 4309 if (offset < 0) { 4310 ret = offset; 4311 goto fail_broken_refcounts; 4312 } else if (offset > 0) { 4313 error_report("First cluster in emptied image is in use"); 4314 abort(); 4315 } 4316 4317 /* Now finally the in-memory information corresponds to the on-disk 4318 * structures and is correct */ 4319 ret = qcow2_mark_clean(bs); 4320 if (ret < 0) { 4321 goto fail; 4322 } 4323 4324 ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, 4325 PREALLOC_MODE_OFF, &local_err); 4326 if (ret < 0) { 4327 error_report_err(local_err); 4328 goto fail; 4329 } 4330 4331 return 0; 4332 4333 fail_broken_refcounts: 4334 /* The BDS is unusable at this point. If we wanted to make it usable, we 4335 * would have to call qcow2_refcount_close(), qcow2_refcount_init(), 4336 * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init() 4337 * again. However, because the functions which could have caused this error 4338 * path to be taken are used by those functions as well, it's very likely 4339 * that that sequence will fail as well. Therefore, just eject the BDS. */ 4340 bs->drv = NULL; 4341 4342 fail: 4343 g_free(new_reftable); 4344 return ret; 4345 } 4346 4347 static int qcow2_make_empty(BlockDriverState *bs) 4348 { 4349 BDRVQcow2State *s = bs->opaque; 4350 uint64_t offset, end_offset; 4351 int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size); 4352 int l1_clusters, ret = 0; 4353 4354 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 4355 4356 if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps && 4357 3 + l1_clusters <= s->refcount_block_size && 4358 s->crypt_method_header != QCOW_CRYPT_LUKS) { 4359 /* The following function only works for qcow2 v3 images (it 4360 * requires the dirty flag) and only as long as there are no 4361 * features that reserve extra clusters (such as snapshots, 4362 * LUKS header, or persistent bitmaps), because it completely 4363 * empties the image. Furthermore, the L1 table and three 4364 * additional clusters (image header, refcount table, one 4365 * refcount block) have to fit inside one refcount block. */ 4366 return make_completely_empty(bs); 4367 } 4368 4369 /* This fallback code simply discards every active cluster; this is slow, 4370 * but works in all cases */ 4371 end_offset = bs->total_sectors * BDRV_SECTOR_SIZE; 4372 for (offset = 0; offset < end_offset; offset += step) { 4373 /* As this function is generally used after committing an external 4374 * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the 4375 * default action for this kind of discard is to pass the discard, 4376 * which will ideally result in an actually smaller image file, as 4377 * is probably desired. */ 4378 ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset), 4379 QCOW2_DISCARD_SNAPSHOT, true); 4380 if (ret < 0) { 4381 break; 4382 } 4383 } 4384 4385 return ret; 4386 } 4387 4388 static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) 4389 { 4390 BDRVQcow2State *s = bs->opaque; 4391 int ret; 4392 4393 qemu_co_mutex_lock(&s->lock); 4394 ret = qcow2_write_caches(bs); 4395 qemu_co_mutex_unlock(&s->lock); 4396 4397 return ret; 4398 } 4399 4400 static ssize_t qcow2_measure_crypto_hdr_init_func(QCryptoBlock *block, 4401 size_t headerlen, void *opaque, Error **errp) 4402 { 4403 size_t *headerlenp = opaque; 4404 4405 /* Stash away the payload size */ 4406 *headerlenp = headerlen; 4407 return 0; 4408 } 4409 4410 static ssize_t qcow2_measure_crypto_hdr_write_func(QCryptoBlock *block, 4411 size_t offset, const uint8_t *buf, size_t buflen, 4412 void *opaque, Error **errp) 4413 { 4414 /* Discard the bytes, we're not actually writing to an image */ 4415 return buflen; 4416 } 4417 4418 /* Determine the number of bytes for the LUKS payload */ 4419 static bool qcow2_measure_luks_headerlen(QemuOpts *opts, size_t *len, 4420 Error **errp) 4421 { 4422 QDict *opts_qdict; 4423 QDict *cryptoopts_qdict; 4424 QCryptoBlockCreateOptions *cryptoopts; 4425 QCryptoBlock *crypto; 4426 4427 /* Extract "encrypt." options into a qdict */ 4428 opts_qdict = qemu_opts_to_qdict(opts, NULL); 4429 qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt."); 4430 qobject_unref(opts_qdict); 4431 4432 /* Build QCryptoBlockCreateOptions object from qdict */ 4433 qdict_put_str(cryptoopts_qdict, "format", "luks"); 4434 cryptoopts = block_crypto_create_opts_init(cryptoopts_qdict, errp); 4435 qobject_unref(cryptoopts_qdict); 4436 if (!cryptoopts) { 4437 return false; 4438 } 4439 4440 /* Fake LUKS creation in order to determine the payload size */ 4441 crypto = qcrypto_block_create(cryptoopts, "encrypt.", 4442 qcow2_measure_crypto_hdr_init_func, 4443 qcow2_measure_crypto_hdr_write_func, 4444 len, errp); 4445 qapi_free_QCryptoBlockCreateOptions(cryptoopts); 4446 if (!crypto) { 4447 return false; 4448 } 4449 4450 qcrypto_block_free(crypto); 4451 return true; 4452 } 4453 4454 static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, 4455 Error **errp) 4456 { 4457 Error *local_err = NULL; 4458 BlockMeasureInfo *info; 4459 uint64_t required = 0; /* bytes that contribute to required size */ 4460 uint64_t virtual_size; /* disk size as seen by guest */ 4461 uint64_t refcount_bits; 4462 uint64_t l2_tables; 4463 uint64_t luks_payload_size = 0; 4464 size_t cluster_size; 4465 int version; 4466 char *optstr; 4467 PreallocMode prealloc; 4468 bool has_backing_file; 4469 bool has_luks; 4470 4471 /* Parse image creation options */ 4472 cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err); 4473 if (local_err) { 4474 goto err; 4475 } 4476 4477 version = qcow2_opt_get_version_del(opts, &local_err); 4478 if (local_err) { 4479 goto err; 4480 } 4481 4482 refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err); 4483 if (local_err) { 4484 goto err; 4485 } 4486 4487 optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 4488 prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr, 4489 PREALLOC_MODE_OFF, &local_err); 4490 g_free(optstr); 4491 if (local_err) { 4492 goto err; 4493 } 4494 4495 optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); 4496 has_backing_file = !!optstr; 4497 g_free(optstr); 4498 4499 optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT); 4500 has_luks = optstr && strcmp(optstr, "luks") == 0; 4501 g_free(optstr); 4502 4503 if (has_luks) { 4504 size_t headerlen; 4505 4506 if (!qcow2_measure_luks_headerlen(opts, &headerlen, &local_err)) { 4507 goto err; 4508 } 4509 4510 luks_payload_size = ROUND_UP(headerlen, cluster_size); 4511 } 4512 4513 virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 4514 virtual_size = ROUND_UP(virtual_size, cluster_size); 4515 4516 /* Check that virtual disk size is valid */ 4517 l2_tables = DIV_ROUND_UP(virtual_size / cluster_size, 4518 cluster_size / sizeof(uint64_t)); 4519 if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) { 4520 error_setg(&local_err, "The image size is too large " 4521 "(try using a larger cluster size)"); 4522 goto err; 4523 } 4524 4525 /* Account for input image */ 4526 if (in_bs) { 4527 int64_t ssize = bdrv_getlength(in_bs); 4528 if (ssize < 0) { 4529 error_setg_errno(&local_err, -ssize, 4530 "Unable to get image virtual_size"); 4531 goto err; 4532 } 4533 4534 virtual_size = ROUND_UP(ssize, cluster_size); 4535 4536 if (has_backing_file) { 4537 /* We don't how much of the backing chain is shared by the input 4538 * image and the new image file. In the worst case the new image's 4539 * backing file has nothing in common with the input image. Be 4540 * conservative and assume all clusters need to be written. 4541 */ 4542 required = virtual_size; 4543 } else { 4544 int64_t offset; 4545 int64_t pnum = 0; 4546 4547 for (offset = 0; offset < ssize; offset += pnum) { 4548 int ret; 4549 4550 ret = bdrv_block_status_above(in_bs, NULL, offset, 4551 ssize - offset, &pnum, NULL, 4552 NULL); 4553 if (ret < 0) { 4554 error_setg_errno(&local_err, -ret, 4555 "Unable to get block status"); 4556 goto err; 4557 } 4558 4559 if (ret & BDRV_BLOCK_ZERO) { 4560 /* Skip zero regions (safe with no backing file) */ 4561 } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == 4562 (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { 4563 /* Extend pnum to end of cluster for next iteration */ 4564 pnum = ROUND_UP(offset + pnum, cluster_size) - offset; 4565 4566 /* Count clusters we've seen */ 4567 required += offset % cluster_size + pnum; 4568 } 4569 } 4570 } 4571 } 4572 4573 /* Take into account preallocation. Nothing special is needed for 4574 * PREALLOC_MODE_METADATA since metadata is always counted. 4575 */ 4576 if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { 4577 required = virtual_size; 4578 } 4579 4580 info = g_new(BlockMeasureInfo, 1); 4581 info->fully_allocated = 4582 qcow2_calc_prealloc_size(virtual_size, cluster_size, 4583 ctz32(refcount_bits)) + luks_payload_size; 4584 4585 /* Remove data clusters that are not required. This overestimates the 4586 * required size because metadata needed for the fully allocated file is 4587 * still counted. 4588 */ 4589 info->required = info->fully_allocated - virtual_size + required; 4590 return info; 4591 4592 err: 4593 error_propagate(errp, local_err); 4594 return NULL; 4595 } 4596 4597 static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 4598 { 4599 BDRVQcow2State *s = bs->opaque; 4600 bdi->unallocated_blocks_are_zero = true; 4601 bdi->cluster_size = s->cluster_size; 4602 bdi->vm_state_offset = qcow2_vm_state_offset(s); 4603 return 0; 4604 } 4605 4606 static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs, 4607 Error **errp) 4608 { 4609 BDRVQcow2State *s = bs->opaque; 4610 ImageInfoSpecific *spec_info; 4611 QCryptoBlockInfo *encrypt_info = NULL; 4612 Error *local_err = NULL; 4613 4614 if (s->crypto != NULL) { 4615 encrypt_info = qcrypto_block_get_info(s->crypto, &local_err); 4616 if (local_err) { 4617 error_propagate(errp, local_err); 4618 return NULL; 4619 } 4620 } 4621 4622 spec_info = g_new(ImageInfoSpecific, 1); 4623 *spec_info = (ImageInfoSpecific){ 4624 .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, 4625 .u.qcow2.data = g_new0(ImageInfoSpecificQCow2, 1), 4626 }; 4627 if (s->qcow_version == 2) { 4628 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 4629 .compat = g_strdup("0.10"), 4630 .refcount_bits = s->refcount_bits, 4631 }; 4632 } else if (s->qcow_version == 3) { 4633 Qcow2BitmapInfoList *bitmaps; 4634 bitmaps = qcow2_get_bitmap_info_list(bs, &local_err); 4635 if (local_err) { 4636 error_propagate(errp, local_err); 4637 qapi_free_ImageInfoSpecific(spec_info); 4638 return NULL; 4639 } 4640 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 4641 .compat = g_strdup("1.1"), 4642 .lazy_refcounts = s->compatible_features & 4643 QCOW2_COMPAT_LAZY_REFCOUNTS, 4644 .has_lazy_refcounts = true, 4645 .corrupt = s->incompatible_features & 4646 QCOW2_INCOMPAT_CORRUPT, 4647 .has_corrupt = true, 4648 .refcount_bits = s->refcount_bits, 4649 .has_bitmaps = !!bitmaps, 4650 .bitmaps = bitmaps, 4651 .has_data_file = !!s->image_data_file, 4652 .data_file = g_strdup(s->image_data_file), 4653 .has_data_file_raw = has_data_file(bs), 4654 .data_file_raw = data_file_is_raw(bs), 4655 }; 4656 } else { 4657 /* if this assertion fails, this probably means a new version was 4658 * added without having it covered here */ 4659 assert(false); 4660 } 4661 4662 if (encrypt_info) { 4663 ImageInfoSpecificQCow2Encryption *qencrypt = 4664 g_new(ImageInfoSpecificQCow2Encryption, 1); 4665 switch (encrypt_info->format) { 4666 case Q_CRYPTO_BLOCK_FORMAT_QCOW: 4667 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES; 4668 break; 4669 case Q_CRYPTO_BLOCK_FORMAT_LUKS: 4670 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS; 4671 qencrypt->u.luks = encrypt_info->u.luks; 4672 break; 4673 default: 4674 abort(); 4675 } 4676 /* Since we did shallow copy above, erase any pointers 4677 * in the original info */ 4678 memset(&encrypt_info->u, 0, sizeof(encrypt_info->u)); 4679 qapi_free_QCryptoBlockInfo(encrypt_info); 4680 4681 spec_info->u.qcow2.data->has_encrypt = true; 4682 spec_info->u.qcow2.data->encrypt = qencrypt; 4683 } 4684 4685 return spec_info; 4686 } 4687 4688 static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 4689 int64_t pos) 4690 { 4691 BDRVQcow2State *s = bs->opaque; 4692 4693 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); 4694 return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos, 4695 qiov->size, qiov, 0); 4696 } 4697 4698 static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 4699 int64_t pos) 4700 { 4701 BDRVQcow2State *s = bs->opaque; 4702 4703 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); 4704 return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos, 4705 qiov->size, qiov, 0); 4706 } 4707 4708 /* 4709 * Downgrades an image's version. To achieve this, any incompatible features 4710 * have to be removed. 4711 */ 4712 static int qcow2_downgrade(BlockDriverState *bs, int target_version, 4713 BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 4714 Error **errp) 4715 { 4716 BDRVQcow2State *s = bs->opaque; 4717 int current_version = s->qcow_version; 4718 int ret; 4719 4720 /* This is qcow2_downgrade(), not qcow2_upgrade() */ 4721 assert(target_version < current_version); 4722 4723 /* There are no other versions (now) that you can downgrade to */ 4724 assert(target_version == 2); 4725 4726 if (s->refcount_order != 4) { 4727 error_setg(errp, "compat=0.10 requires refcount_bits=16"); 4728 return -ENOTSUP; 4729 } 4730 4731 if (has_data_file(bs)) { 4732 error_setg(errp, "Cannot downgrade an image with a data file"); 4733 return -ENOTSUP; 4734 } 4735 4736 /* clear incompatible features */ 4737 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 4738 ret = qcow2_mark_clean(bs); 4739 if (ret < 0) { 4740 error_setg_errno(errp, -ret, "Failed to make the image clean"); 4741 return ret; 4742 } 4743 } 4744 4745 /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in 4746 * the first place; if that happens nonetheless, returning -ENOTSUP is the 4747 * best thing to do anyway */ 4748 4749 if (s->incompatible_features) { 4750 error_setg(errp, "Cannot downgrade an image with incompatible features " 4751 "%#" PRIx64 " set", s->incompatible_features); 4752 return -ENOTSUP; 4753 } 4754 4755 /* since we can ignore compatible features, we can set them to 0 as well */ 4756 s->compatible_features = 0; 4757 /* if lazy refcounts have been used, they have already been fixed through 4758 * clearing the dirty flag */ 4759 4760 /* clearing autoclear features is trivial */ 4761 s->autoclear_features = 0; 4762 4763 ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque); 4764 if (ret < 0) { 4765 error_setg_errno(errp, -ret, "Failed to turn zero into data clusters"); 4766 return ret; 4767 } 4768 4769 s->qcow_version = target_version; 4770 ret = qcow2_update_header(bs); 4771 if (ret < 0) { 4772 s->qcow_version = current_version; 4773 error_setg_errno(errp, -ret, "Failed to update the image header"); 4774 return ret; 4775 } 4776 return 0; 4777 } 4778 4779 typedef enum Qcow2AmendOperation { 4780 /* This is the value Qcow2AmendHelperCBInfo::last_operation will be 4781 * statically initialized to so that the helper CB can discern the first 4782 * invocation from an operation change */ 4783 QCOW2_NO_OPERATION = 0, 4784 4785 QCOW2_CHANGING_REFCOUNT_ORDER, 4786 QCOW2_DOWNGRADING, 4787 } Qcow2AmendOperation; 4788 4789 typedef struct Qcow2AmendHelperCBInfo { 4790 /* The code coordinating the amend operations should only modify 4791 * these four fields; the rest will be managed by the CB */ 4792 BlockDriverAmendStatusCB *original_status_cb; 4793 void *original_cb_opaque; 4794 4795 Qcow2AmendOperation current_operation; 4796 4797 /* Total number of operations to perform (only set once) */ 4798 int total_operations; 4799 4800 /* The following fields are managed by the CB */ 4801 4802 /* Number of operations completed */ 4803 int operations_completed; 4804 4805 /* Cumulative offset of all completed operations */ 4806 int64_t offset_completed; 4807 4808 Qcow2AmendOperation last_operation; 4809 int64_t last_work_size; 4810 } Qcow2AmendHelperCBInfo; 4811 4812 static void qcow2_amend_helper_cb(BlockDriverState *bs, 4813 int64_t operation_offset, 4814 int64_t operation_work_size, void *opaque) 4815 { 4816 Qcow2AmendHelperCBInfo *info = opaque; 4817 int64_t current_work_size; 4818 int64_t projected_work_size; 4819 4820 if (info->current_operation != info->last_operation) { 4821 if (info->last_operation != QCOW2_NO_OPERATION) { 4822 info->offset_completed += info->last_work_size; 4823 info->operations_completed++; 4824 } 4825 4826 info->last_operation = info->current_operation; 4827 } 4828 4829 assert(info->total_operations > 0); 4830 assert(info->operations_completed < info->total_operations); 4831 4832 info->last_work_size = operation_work_size; 4833 4834 current_work_size = info->offset_completed + operation_work_size; 4835 4836 /* current_work_size is the total work size for (operations_completed + 1) 4837 * operations (which includes this one), so multiply it by the number of 4838 * operations not covered and divide it by the number of operations 4839 * covered to get a projection for the operations not covered */ 4840 projected_work_size = current_work_size * (info->total_operations - 4841 info->operations_completed - 1) 4842 / (info->operations_completed + 1); 4843 4844 info->original_status_cb(bs, info->offset_completed + operation_offset, 4845 current_work_size + projected_work_size, 4846 info->original_cb_opaque); 4847 } 4848 4849 static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, 4850 BlockDriverAmendStatusCB *status_cb, 4851 void *cb_opaque, 4852 Error **errp) 4853 { 4854 BDRVQcow2State *s = bs->opaque; 4855 int old_version = s->qcow_version, new_version = old_version; 4856 uint64_t new_size = 0; 4857 const char *backing_file = NULL, *backing_format = NULL, *data_file = NULL; 4858 bool lazy_refcounts = s->use_lazy_refcounts; 4859 bool data_file_raw = data_file_is_raw(bs); 4860 const char *compat = NULL; 4861 uint64_t cluster_size = s->cluster_size; 4862 bool encrypt; 4863 int encformat; 4864 int refcount_bits = s->refcount_bits; 4865 int ret; 4866 QemuOptDesc *desc = opts->list->desc; 4867 Qcow2AmendHelperCBInfo helper_cb_info; 4868 4869 while (desc && desc->name) { 4870 if (!qemu_opt_find(opts, desc->name)) { 4871 /* only change explicitly defined options */ 4872 desc++; 4873 continue; 4874 } 4875 4876 if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) { 4877 compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL); 4878 if (!compat) { 4879 /* preserve default */ 4880 } else if (!strcmp(compat, "0.10")) { 4881 new_version = 2; 4882 } else if (!strcmp(compat, "1.1")) { 4883 new_version = 3; 4884 } else { 4885 error_setg(errp, "Unknown compatibility level %s", compat); 4886 return -EINVAL; 4887 } 4888 } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) { 4889 error_setg(errp, "Cannot change preallocation mode"); 4890 return -ENOTSUP; 4891 } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { 4892 new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 4893 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) { 4894 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 4895 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) { 4896 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 4897 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) { 4898 encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, 4899 !!s->crypto); 4900 4901 if (encrypt != !!s->crypto) { 4902 error_setg(errp, 4903 "Changing the encryption flag is not supported"); 4904 return -ENOTSUP; 4905 } 4906 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) { 4907 encformat = qcow2_crypt_method_from_format( 4908 qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT)); 4909 4910 if (encformat != s->crypt_method_header) { 4911 error_setg(errp, 4912 "Changing the encryption format is not supported"); 4913 return -ENOTSUP; 4914 } 4915 } else if (g_str_has_prefix(desc->name, "encrypt.")) { 4916 error_setg(errp, 4917 "Changing the encryption parameters is not supported"); 4918 return -ENOTSUP; 4919 } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { 4920 cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 4921 cluster_size); 4922 if (cluster_size != s->cluster_size) { 4923 error_setg(errp, "Changing the cluster size is not supported"); 4924 return -ENOTSUP; 4925 } 4926 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { 4927 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, 4928 lazy_refcounts); 4929 } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { 4930 refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS, 4931 refcount_bits); 4932 4933 if (refcount_bits <= 0 || refcount_bits > 64 || 4934 !is_power_of_2(refcount_bits)) 4935 { 4936 error_setg(errp, "Refcount width must be a power of two and " 4937 "may not exceed 64 bits"); 4938 return -EINVAL; 4939 } 4940 } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE)) { 4941 data_file = qemu_opt_get(opts, BLOCK_OPT_DATA_FILE); 4942 if (data_file && !has_data_file(bs)) { 4943 error_setg(errp, "data-file can only be set for images that " 4944 "use an external data file"); 4945 return -EINVAL; 4946 } 4947 } else if (!strcmp(desc->name, BLOCK_OPT_DATA_FILE_RAW)) { 4948 data_file_raw = qemu_opt_get_bool(opts, BLOCK_OPT_DATA_FILE_RAW, 4949 data_file_raw); 4950 if (data_file_raw && !data_file_is_raw(bs)) { 4951 error_setg(errp, "data-file-raw cannot be set on existing " 4952 "images"); 4953 return -EINVAL; 4954 } 4955 } else { 4956 /* if this point is reached, this probably means a new option was 4957 * added without having it covered here */ 4958 abort(); 4959 } 4960 4961 desc++; 4962 } 4963 4964 helper_cb_info = (Qcow2AmendHelperCBInfo){ 4965 .original_status_cb = status_cb, 4966 .original_cb_opaque = cb_opaque, 4967 .total_operations = (new_version < old_version) 4968 + (s->refcount_bits != refcount_bits) 4969 }; 4970 4971 /* Upgrade first (some features may require compat=1.1) */ 4972 if (new_version > old_version) { 4973 s->qcow_version = new_version; 4974 ret = qcow2_update_header(bs); 4975 if (ret < 0) { 4976 s->qcow_version = old_version; 4977 error_setg_errno(errp, -ret, "Failed to update the image header"); 4978 return ret; 4979 } 4980 } 4981 4982 if (s->refcount_bits != refcount_bits) { 4983 int refcount_order = ctz32(refcount_bits); 4984 4985 if (new_version < 3 && refcount_bits != 16) { 4986 error_setg(errp, "Refcount widths other than 16 bits require " 4987 "compatibility level 1.1 or above (use compat=1.1 or " 4988 "greater)"); 4989 return -EINVAL; 4990 } 4991 4992 helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; 4993 ret = qcow2_change_refcount_order(bs, refcount_order, 4994 &qcow2_amend_helper_cb, 4995 &helper_cb_info, errp); 4996 if (ret < 0) { 4997 return ret; 4998 } 4999 } 5000 5001 /* data-file-raw blocks backing files, so clear it first if requested */ 5002 if (data_file_raw) { 5003 s->autoclear_features |= QCOW2_AUTOCLEAR_DATA_FILE_RAW; 5004 } else { 5005 s->autoclear_features &= ~QCOW2_AUTOCLEAR_DATA_FILE_RAW; 5006 } 5007 5008 if (data_file) { 5009 g_free(s->image_data_file); 5010 s->image_data_file = *data_file ? g_strdup(data_file) : NULL; 5011 } 5012 5013 ret = qcow2_update_header(bs); 5014 if (ret < 0) { 5015 error_setg_errno(errp, -ret, "Failed to update the image header"); 5016 return ret; 5017 } 5018 5019 if (backing_file || backing_format) { 5020 ret = qcow2_change_backing_file(bs, 5021 backing_file ?: s->image_backing_file, 5022 backing_format ?: s->image_backing_format); 5023 if (ret < 0) { 5024 error_setg_errno(errp, -ret, "Failed to change the backing file"); 5025 return ret; 5026 } 5027 } 5028 5029 if (s->use_lazy_refcounts != lazy_refcounts) { 5030 if (lazy_refcounts) { 5031 if (new_version < 3) { 5032 error_setg(errp, "Lazy refcounts only supported with " 5033 "compatibility level 1.1 and above (use compat=1.1 " 5034 "or greater)"); 5035 return -EINVAL; 5036 } 5037 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 5038 ret = qcow2_update_header(bs); 5039 if (ret < 0) { 5040 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 5041 error_setg_errno(errp, -ret, "Failed to update the image header"); 5042 return ret; 5043 } 5044 s->use_lazy_refcounts = true; 5045 } else { 5046 /* make image clean first */ 5047 ret = qcow2_mark_clean(bs); 5048 if (ret < 0) { 5049 error_setg_errno(errp, -ret, "Failed to make the image clean"); 5050 return ret; 5051 } 5052 /* now disallow lazy refcounts */ 5053 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 5054 ret = qcow2_update_header(bs); 5055 if (ret < 0) { 5056 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 5057 error_setg_errno(errp, -ret, "Failed to update the image header"); 5058 return ret; 5059 } 5060 s->use_lazy_refcounts = false; 5061 } 5062 } 5063 5064 if (new_size) { 5065 BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); 5066 ret = blk_insert_bs(blk, bs, errp); 5067 if (ret < 0) { 5068 blk_unref(blk); 5069 return ret; 5070 } 5071 5072 ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, errp); 5073 blk_unref(blk); 5074 if (ret < 0) { 5075 return ret; 5076 } 5077 } 5078 5079 /* Downgrade last (so unsupported features can be removed before) */ 5080 if (new_version < old_version) { 5081 helper_cb_info.current_operation = QCOW2_DOWNGRADING; 5082 ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb, 5083 &helper_cb_info, errp); 5084 if (ret < 0) { 5085 return ret; 5086 } 5087 } 5088 5089 return 0; 5090 } 5091 5092 /* 5093 * If offset or size are negative, respectively, they will not be included in 5094 * the BLOCK_IMAGE_CORRUPTED event emitted. 5095 * fatal will be ignored for read-only BDS; corruptions found there will always 5096 * be considered non-fatal. 5097 */ 5098 void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, 5099 int64_t size, const char *message_format, ...) 5100 { 5101 BDRVQcow2State *s = bs->opaque; 5102 const char *node_name; 5103 char *message; 5104 va_list ap; 5105 5106 fatal = fatal && bdrv_is_writable(bs); 5107 5108 if (s->signaled_corruption && 5109 (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT))) 5110 { 5111 return; 5112 } 5113 5114 va_start(ap, message_format); 5115 message = g_strdup_vprintf(message_format, ap); 5116 va_end(ap); 5117 5118 if (fatal) { 5119 fprintf(stderr, "qcow2: Marking image as corrupt: %s; further " 5120 "corruption events will be suppressed\n", message); 5121 } else { 5122 fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal " 5123 "corruption events will be suppressed\n", message); 5124 } 5125 5126 node_name = bdrv_get_node_name(bs); 5127 qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), 5128 *node_name != '\0', node_name, 5129 message, offset >= 0, offset, 5130 size >= 0, size, 5131 fatal); 5132 g_free(message); 5133 5134 if (fatal) { 5135 qcow2_mark_corrupt(bs); 5136 bs->drv = NULL; /* make BDS unusable */ 5137 } 5138 5139 s->signaled_corruption = true; 5140 } 5141 5142 static QemuOptsList qcow2_create_opts = { 5143 .name = "qcow2-create-opts", 5144 .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head), 5145 .desc = { 5146 { 5147 .name = BLOCK_OPT_SIZE, 5148 .type = QEMU_OPT_SIZE, 5149 .help = "Virtual disk size" 5150 }, 5151 { 5152 .name = BLOCK_OPT_COMPAT_LEVEL, 5153 .type = QEMU_OPT_STRING, 5154 .help = "Compatibility level (0.10 or 1.1)" 5155 }, 5156 { 5157 .name = BLOCK_OPT_BACKING_FILE, 5158 .type = QEMU_OPT_STRING, 5159 .help = "File name of a base image" 5160 }, 5161 { 5162 .name = BLOCK_OPT_BACKING_FMT, 5163 .type = QEMU_OPT_STRING, 5164 .help = "Image format of the base image" 5165 }, 5166 { 5167 .name = BLOCK_OPT_DATA_FILE, 5168 .type = QEMU_OPT_STRING, 5169 .help = "File name of an external data file" 5170 }, 5171 { 5172 .name = BLOCK_OPT_DATA_FILE_RAW, 5173 .type = QEMU_OPT_BOOL, 5174 .help = "The external data file must stay valid as a raw image" 5175 }, 5176 { 5177 .name = BLOCK_OPT_ENCRYPT, 5178 .type = QEMU_OPT_BOOL, 5179 .help = "Encrypt the image with format 'aes'. (Deprecated " 5180 "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", 5181 }, 5182 { 5183 .name = BLOCK_OPT_ENCRYPT_FORMAT, 5184 .type = QEMU_OPT_STRING, 5185 .help = "Encrypt the image, format choices: 'aes', 'luks'", 5186 }, 5187 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", 5188 "ID of secret providing qcow AES key or LUKS passphrase"), 5189 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."), 5190 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."), 5191 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."), 5192 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."), 5193 BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."), 5194 BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), 5195 { 5196 .name = BLOCK_OPT_CLUSTER_SIZE, 5197 .type = QEMU_OPT_SIZE, 5198 .help = "qcow2 cluster size", 5199 .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) 5200 }, 5201 { 5202 .name = BLOCK_OPT_PREALLOC, 5203 .type = QEMU_OPT_STRING, 5204 .help = "Preallocation mode (allowed values: off, metadata, " 5205 "falloc, full)" 5206 }, 5207 { 5208 .name = BLOCK_OPT_LAZY_REFCOUNTS, 5209 .type = QEMU_OPT_BOOL, 5210 .help = "Postpone refcount updates", 5211 .def_value_str = "off" 5212 }, 5213 { 5214 .name = BLOCK_OPT_REFCOUNT_BITS, 5215 .type = QEMU_OPT_NUMBER, 5216 .help = "Width of a reference count entry in bits", 5217 .def_value_str = "16" 5218 }, 5219 { /* end of list */ } 5220 } 5221 }; 5222 5223 static const char *const qcow2_strong_runtime_opts[] = { 5224 "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET, 5225 5226 NULL 5227 }; 5228 5229 BlockDriver bdrv_qcow2 = { 5230 .format_name = "qcow2", 5231 .instance_size = sizeof(BDRVQcow2State), 5232 .bdrv_probe = qcow2_probe, 5233 .bdrv_open = qcow2_open, 5234 .bdrv_close = qcow2_close, 5235 .bdrv_reopen_prepare = qcow2_reopen_prepare, 5236 .bdrv_reopen_commit = qcow2_reopen_commit, 5237 .bdrv_reopen_abort = qcow2_reopen_abort, 5238 .bdrv_join_options = qcow2_join_options, 5239 .bdrv_child_perm = bdrv_format_default_perms, 5240 .bdrv_co_create_opts = qcow2_co_create_opts, 5241 .bdrv_co_create = qcow2_co_create, 5242 .bdrv_has_zero_init = bdrv_has_zero_init_1, 5243 .bdrv_co_block_status = qcow2_co_block_status, 5244 5245 .bdrv_co_preadv = qcow2_co_preadv, 5246 .bdrv_co_pwritev = qcow2_co_pwritev, 5247 .bdrv_co_flush_to_os = qcow2_co_flush_to_os, 5248 5249 .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, 5250 .bdrv_co_pdiscard = qcow2_co_pdiscard, 5251 .bdrv_co_copy_range_from = qcow2_co_copy_range_from, 5252 .bdrv_co_copy_range_to = qcow2_co_copy_range_to, 5253 .bdrv_co_truncate = qcow2_co_truncate, 5254 .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed, 5255 .bdrv_make_empty = qcow2_make_empty, 5256 5257 .bdrv_snapshot_create = qcow2_snapshot_create, 5258 .bdrv_snapshot_goto = qcow2_snapshot_goto, 5259 .bdrv_snapshot_delete = qcow2_snapshot_delete, 5260 .bdrv_snapshot_list = qcow2_snapshot_list, 5261 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, 5262 .bdrv_measure = qcow2_measure, 5263 .bdrv_get_info = qcow2_get_info, 5264 .bdrv_get_specific_info = qcow2_get_specific_info, 5265 5266 .bdrv_save_vmstate = qcow2_save_vmstate, 5267 .bdrv_load_vmstate = qcow2_load_vmstate, 5268 5269 .supports_backing = true, 5270 .bdrv_change_backing_file = qcow2_change_backing_file, 5271 5272 .bdrv_refresh_limits = qcow2_refresh_limits, 5273 .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache, 5274 .bdrv_inactivate = qcow2_inactivate, 5275 5276 .create_opts = &qcow2_create_opts, 5277 .strong_runtime_opts = qcow2_strong_runtime_opts, 5278 .bdrv_co_check = qcow2_co_check, 5279 .bdrv_amend_options = qcow2_amend_options, 5280 5281 .bdrv_detach_aio_context = qcow2_detach_aio_context, 5282 .bdrv_attach_aio_context = qcow2_attach_aio_context, 5283 5284 .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw, 5285 .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap, 5286 .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap, 5287 }; 5288 5289 static void bdrv_qcow2_init(void) 5290 { 5291 bdrv_register(&bdrv_qcow2); 5292 } 5293 5294 block_init(bdrv_qcow2_init); 5295