1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 #define ZLIB_CONST 28 #include <zlib.h> 29 30 #include "block/block_int.h" 31 #include "block/qdict.h" 32 #include "sysemu/block-backend.h" 33 #include "qemu/module.h" 34 #include "qcow2.h" 35 #include "qemu/error-report.h" 36 #include "qapi/error.h" 37 #include "qapi/qapi-events-block-core.h" 38 #include "qapi/qmp/qdict.h" 39 #include "qapi/qmp/qstring.h" 40 #include "trace.h" 41 #include "qemu/option_int.h" 42 #include "qemu/cutils.h" 43 #include "qemu/bswap.h" 44 #include "qapi/qobject-input-visitor.h" 45 #include "qapi/qapi-visit-block-core.h" 46 #include "crypto.h" 47 #include "block/thread-pool.h" 48 49 /* 50 Differences with QCOW: 51 52 - Support for multiple incremental snapshots. 53 - Memory management by reference counts. 54 - Clusters which have a reference count of one have the bit 55 QCOW_OFLAG_COPIED to optimize write performance. 56 - Size of compressed clusters is stored in sectors to reduce bit usage 57 in the cluster offsets. 58 - Support for storing additional data (such as the VM state) in the 59 snapshots. 60 - If a backing store is used, the cluster size is not constrained 61 (could be backported to QCOW). 62 - L2 tables have always a size of one cluster. 63 */ 64 65 66 typedef struct { 67 uint32_t magic; 68 uint32_t len; 69 } QEMU_PACKED QCowExtension; 70 71 #define QCOW2_EXT_MAGIC_END 0 72 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA 73 #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 74 #define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77 75 #define QCOW2_EXT_MAGIC_BITMAPS 0x23852875 76 77 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) 78 { 79 const QCowHeader *cow_header = (const void *)buf; 80 81 if (buf_size >= sizeof(QCowHeader) && 82 be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 83 be32_to_cpu(cow_header->version) >= 2) 84 return 100; 85 else 86 return 0; 87 } 88 89 90 static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset, 91 uint8_t *buf, size_t buflen, 92 void *opaque, Error **errp) 93 { 94 BlockDriverState *bs = opaque; 95 BDRVQcow2State *s = bs->opaque; 96 ssize_t ret; 97 98 if ((offset + buflen) > s->crypto_header.length) { 99 error_setg(errp, "Request for data outside of extension header"); 100 return -1; 101 } 102 103 ret = bdrv_pread(bs->file, 104 s->crypto_header.offset + offset, buf, buflen); 105 if (ret < 0) { 106 error_setg_errno(errp, -ret, "Could not read encryption header"); 107 return -1; 108 } 109 return ret; 110 } 111 112 113 static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, 114 void *opaque, Error **errp) 115 { 116 BlockDriverState *bs = opaque; 117 BDRVQcow2State *s = bs->opaque; 118 int64_t ret; 119 int64_t clusterlen; 120 121 ret = qcow2_alloc_clusters(bs, headerlen); 122 if (ret < 0) { 123 error_setg_errno(errp, -ret, 124 "Cannot allocate cluster for LUKS header size %zu", 125 headerlen); 126 return -1; 127 } 128 129 s->crypto_header.length = headerlen; 130 s->crypto_header.offset = ret; 131 132 /* Zero fill remaining space in cluster so it has predictable 133 * content in case of future spec changes */ 134 clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; 135 assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen) == 0); 136 ret = bdrv_pwrite_zeroes(bs->file, 137 ret + headerlen, 138 clusterlen - headerlen, 0); 139 if (ret < 0) { 140 error_setg_errno(errp, -ret, "Could not zero fill encryption header"); 141 return -1; 142 } 143 144 return ret; 145 } 146 147 148 static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, 149 const uint8_t *buf, size_t buflen, 150 void *opaque, Error **errp) 151 { 152 BlockDriverState *bs = opaque; 153 BDRVQcow2State *s = bs->opaque; 154 ssize_t ret; 155 156 if ((offset + buflen) > s->crypto_header.length) { 157 error_setg(errp, "Request for data outside of extension header"); 158 return -1; 159 } 160 161 ret = bdrv_pwrite(bs->file, 162 s->crypto_header.offset + offset, buf, buflen); 163 if (ret < 0) { 164 error_setg_errno(errp, -ret, "Could not read encryption header"); 165 return -1; 166 } 167 return ret; 168 } 169 170 171 /* 172 * read qcow2 extension and fill bs 173 * start reading from start_offset 174 * finish reading upon magic of value 0 or when end_offset reached 175 * unknown magic is skipped (future extension this version knows nothing about) 176 * return 0 upon success, non-0 otherwise 177 */ 178 static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, 179 uint64_t end_offset, void **p_feature_table, 180 int flags, bool *need_update_header, 181 Error **errp) 182 { 183 BDRVQcow2State *s = bs->opaque; 184 QCowExtension ext; 185 uint64_t offset; 186 int ret; 187 Qcow2BitmapHeaderExt bitmaps_ext; 188 189 if (need_update_header != NULL) { 190 *need_update_header = false; 191 } 192 193 #ifdef DEBUG_EXT 194 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); 195 #endif 196 offset = start_offset; 197 while (offset < end_offset) { 198 199 #ifdef DEBUG_EXT 200 /* Sanity check */ 201 if (offset > s->cluster_size) 202 printf("qcow2_read_extension: suspicious offset %lu\n", offset); 203 204 printf("attempting to read extended header in offset %lu\n", offset); 205 #endif 206 207 ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext)); 208 if (ret < 0) { 209 error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " 210 "pread fail from offset %" PRIu64, offset); 211 return 1; 212 } 213 be32_to_cpus(&ext.magic); 214 be32_to_cpus(&ext.len); 215 offset += sizeof(ext); 216 #ifdef DEBUG_EXT 217 printf("ext.magic = 0x%x\n", ext.magic); 218 #endif 219 if (offset > end_offset || ext.len > end_offset - offset) { 220 error_setg(errp, "Header extension too large"); 221 return -EINVAL; 222 } 223 224 switch (ext.magic) { 225 case QCOW2_EXT_MAGIC_END: 226 return 0; 227 228 case QCOW2_EXT_MAGIC_BACKING_FORMAT: 229 if (ext.len >= sizeof(bs->backing_format)) { 230 error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32 231 " too large (>=%zu)", ext.len, 232 sizeof(bs->backing_format)); 233 return 2; 234 } 235 ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len); 236 if (ret < 0) { 237 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " 238 "Could not read format name"); 239 return 3; 240 } 241 bs->backing_format[ext.len] = '\0'; 242 s->image_backing_format = g_strdup(bs->backing_format); 243 #ifdef DEBUG_EXT 244 printf("Qcow2: Got format extension %s\n", bs->backing_format); 245 #endif 246 break; 247 248 case QCOW2_EXT_MAGIC_FEATURE_TABLE: 249 if (p_feature_table != NULL) { 250 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); 251 ret = bdrv_pread(bs->file, offset , feature_table, ext.len); 252 if (ret < 0) { 253 error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " 254 "Could not read table"); 255 return ret; 256 } 257 258 *p_feature_table = feature_table; 259 } 260 break; 261 262 case QCOW2_EXT_MAGIC_CRYPTO_HEADER: { 263 unsigned int cflags = 0; 264 if (s->crypt_method_header != QCOW_CRYPT_LUKS) { 265 error_setg(errp, "CRYPTO header extension only " 266 "expected with LUKS encryption method"); 267 return -EINVAL; 268 } 269 if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) { 270 error_setg(errp, "CRYPTO header extension size %u, " 271 "but expected size %zu", ext.len, 272 sizeof(Qcow2CryptoHeaderExtension)); 273 return -EINVAL; 274 } 275 276 ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len); 277 if (ret < 0) { 278 error_setg_errno(errp, -ret, 279 "Unable to read CRYPTO header extension"); 280 return ret; 281 } 282 be64_to_cpus(&s->crypto_header.offset); 283 be64_to_cpus(&s->crypto_header.length); 284 285 if ((s->crypto_header.offset % s->cluster_size) != 0) { 286 error_setg(errp, "Encryption header offset '%" PRIu64 "' is " 287 "not a multiple of cluster size '%u'", 288 s->crypto_header.offset, s->cluster_size); 289 return -EINVAL; 290 } 291 292 if (flags & BDRV_O_NO_IO) { 293 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 294 } 295 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 296 qcow2_crypto_hdr_read_func, 297 bs, cflags, errp); 298 if (!s->crypto) { 299 return -EINVAL; 300 } 301 } break; 302 303 case QCOW2_EXT_MAGIC_BITMAPS: 304 if (ext.len != sizeof(bitmaps_ext)) { 305 error_setg_errno(errp, -ret, "bitmaps_ext: " 306 "Invalid extension length"); 307 return -EINVAL; 308 } 309 310 if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) { 311 if (s->qcow_version < 3) { 312 /* Let's be a bit more specific */ 313 warn_report("This qcow2 v2 image contains bitmaps, but " 314 "they may have been modified by a program " 315 "without persistent bitmap support; so now " 316 "they must all be considered inconsistent"); 317 } else { 318 warn_report("a program lacking bitmap support " 319 "modified this file, so all bitmaps are now " 320 "considered inconsistent"); 321 } 322 error_printf("Some clusters may be leaked, " 323 "run 'qemu-img check -r' on the image " 324 "file to fix."); 325 if (need_update_header != NULL) { 326 /* Updating is needed to drop invalid bitmap extension. */ 327 *need_update_header = true; 328 } 329 break; 330 } 331 332 ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len); 333 if (ret < 0) { 334 error_setg_errno(errp, -ret, "bitmaps_ext: " 335 "Could not read ext header"); 336 return ret; 337 } 338 339 if (bitmaps_ext.reserved32 != 0) { 340 error_setg_errno(errp, -ret, "bitmaps_ext: " 341 "Reserved field is not zero"); 342 return -EINVAL; 343 } 344 345 be32_to_cpus(&bitmaps_ext.nb_bitmaps); 346 be64_to_cpus(&bitmaps_ext.bitmap_directory_size); 347 be64_to_cpus(&bitmaps_ext.bitmap_directory_offset); 348 349 if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) { 350 error_setg(errp, 351 "bitmaps_ext: Image has %" PRIu32 " bitmaps, " 352 "exceeding the QEMU supported maximum of %d", 353 bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS); 354 return -EINVAL; 355 } 356 357 if (bitmaps_ext.nb_bitmaps == 0) { 358 error_setg(errp, "found bitmaps extension with zero bitmaps"); 359 return -EINVAL; 360 } 361 362 if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) { 363 error_setg(errp, "bitmaps_ext: " 364 "invalid bitmap directory offset"); 365 return -EINVAL; 366 } 367 368 if (bitmaps_ext.bitmap_directory_size > 369 QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { 370 error_setg(errp, "bitmaps_ext: " 371 "bitmap directory size (%" PRIu64 ") exceeds " 372 "the maximum supported size (%d)", 373 bitmaps_ext.bitmap_directory_size, 374 QCOW2_MAX_BITMAP_DIRECTORY_SIZE); 375 return -EINVAL; 376 } 377 378 s->nb_bitmaps = bitmaps_ext.nb_bitmaps; 379 s->bitmap_directory_offset = 380 bitmaps_ext.bitmap_directory_offset; 381 s->bitmap_directory_size = 382 bitmaps_ext.bitmap_directory_size; 383 384 #ifdef DEBUG_EXT 385 printf("Qcow2: Got bitmaps extension: " 386 "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n", 387 s->bitmap_directory_offset, s->nb_bitmaps); 388 #endif 389 break; 390 391 default: 392 /* unknown magic - save it in case we need to rewrite the header */ 393 /* If you add a new feature, make sure to also update the fast 394 * path of qcow2_make_empty() to deal with it. */ 395 { 396 Qcow2UnknownHeaderExtension *uext; 397 398 uext = g_malloc0(sizeof(*uext) + ext.len); 399 uext->magic = ext.magic; 400 uext->len = ext.len; 401 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); 402 403 ret = bdrv_pread(bs->file, offset , uext->data, uext->len); 404 if (ret < 0) { 405 error_setg_errno(errp, -ret, "ERROR: unknown extension: " 406 "Could not read data"); 407 return ret; 408 } 409 } 410 break; 411 } 412 413 offset += ((ext.len + 7) & ~7); 414 } 415 416 return 0; 417 } 418 419 static void cleanup_unknown_header_ext(BlockDriverState *bs) 420 { 421 BDRVQcow2State *s = bs->opaque; 422 Qcow2UnknownHeaderExtension *uext, *next; 423 424 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { 425 QLIST_REMOVE(uext, next); 426 g_free(uext); 427 } 428 } 429 430 static void report_unsupported_feature(Error **errp, Qcow2Feature *table, 431 uint64_t mask) 432 { 433 char *features = g_strdup(""); 434 char *old; 435 436 while (table && table->name[0] != '\0') { 437 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { 438 if (mask & (1ULL << table->bit)) { 439 old = features; 440 features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "", 441 table->name); 442 g_free(old); 443 mask &= ~(1ULL << table->bit); 444 } 445 } 446 table++; 447 } 448 449 if (mask) { 450 old = features; 451 features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64, 452 old, *old ? ", " : "", mask); 453 g_free(old); 454 } 455 456 error_setg(errp, "Unsupported qcow2 feature(s): %s", features); 457 g_free(features); 458 } 459 460 /* 461 * Sets the dirty bit and flushes afterwards if necessary. 462 * 463 * The incompatible_features bit is only set if the image file header was 464 * updated successfully. Therefore it is not required to check the return 465 * value of this function. 466 */ 467 int qcow2_mark_dirty(BlockDriverState *bs) 468 { 469 BDRVQcow2State *s = bs->opaque; 470 uint64_t val; 471 int ret; 472 473 assert(s->qcow_version >= 3); 474 475 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 476 return 0; /* already dirty */ 477 } 478 479 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); 480 ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), 481 &val, sizeof(val)); 482 if (ret < 0) { 483 return ret; 484 } 485 ret = bdrv_flush(bs->file->bs); 486 if (ret < 0) { 487 return ret; 488 } 489 490 /* Only treat image as dirty if the header was updated successfully */ 491 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY; 492 return 0; 493 } 494 495 /* 496 * Clears the dirty bit and flushes before if necessary. Only call this 497 * function when there are no pending requests, it does not guard against 498 * concurrent requests dirtying the image. 499 */ 500 static int qcow2_mark_clean(BlockDriverState *bs) 501 { 502 BDRVQcow2State *s = bs->opaque; 503 504 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 505 int ret; 506 507 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY; 508 509 ret = qcow2_flush_caches(bs); 510 if (ret < 0) { 511 return ret; 512 } 513 514 return qcow2_update_header(bs); 515 } 516 return 0; 517 } 518 519 /* 520 * Marks the image as corrupt. 521 */ 522 int qcow2_mark_corrupt(BlockDriverState *bs) 523 { 524 BDRVQcow2State *s = bs->opaque; 525 526 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; 527 return qcow2_update_header(bs); 528 } 529 530 /* 531 * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes 532 * before if necessary. 533 */ 534 int qcow2_mark_consistent(BlockDriverState *bs) 535 { 536 BDRVQcow2State *s = bs->opaque; 537 538 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 539 int ret = qcow2_flush_caches(bs); 540 if (ret < 0) { 541 return ret; 542 } 543 544 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT; 545 return qcow2_update_header(bs); 546 } 547 return 0; 548 } 549 550 static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs, 551 BdrvCheckResult *result, 552 BdrvCheckMode fix) 553 { 554 int ret = qcow2_check_refcounts(bs, result, fix); 555 if (ret < 0) { 556 return ret; 557 } 558 559 if (fix && result->check_errors == 0 && result->corruptions == 0) { 560 ret = qcow2_mark_clean(bs); 561 if (ret < 0) { 562 return ret; 563 } 564 return qcow2_mark_consistent(bs); 565 } 566 return ret; 567 } 568 569 static int coroutine_fn qcow2_co_check(BlockDriverState *bs, 570 BdrvCheckResult *result, 571 BdrvCheckMode fix) 572 { 573 BDRVQcow2State *s = bs->opaque; 574 int ret; 575 576 qemu_co_mutex_lock(&s->lock); 577 ret = qcow2_co_check_locked(bs, result, fix); 578 qemu_co_mutex_unlock(&s->lock); 579 return ret; 580 } 581 582 int qcow2_validate_table(BlockDriverState *bs, uint64_t offset, 583 uint64_t entries, size_t entry_len, 584 int64_t max_size_bytes, const char *table_name, 585 Error **errp) 586 { 587 BDRVQcow2State *s = bs->opaque; 588 589 if (entries > max_size_bytes / entry_len) { 590 error_setg(errp, "%s too large", table_name); 591 return -EFBIG; 592 } 593 594 /* Use signed INT64_MAX as the maximum even for uint64_t header fields, 595 * because values will be passed to qemu functions taking int64_t. */ 596 if ((INT64_MAX - entries * entry_len < offset) || 597 (offset_into_cluster(s, offset) != 0)) { 598 error_setg(errp, "%s offset invalid", table_name); 599 return -EINVAL; 600 } 601 602 return 0; 603 } 604 605 static QemuOptsList qcow2_runtime_opts = { 606 .name = "qcow2", 607 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), 608 .desc = { 609 { 610 .name = QCOW2_OPT_LAZY_REFCOUNTS, 611 .type = QEMU_OPT_BOOL, 612 .help = "Postpone refcount updates", 613 }, 614 { 615 .name = QCOW2_OPT_DISCARD_REQUEST, 616 .type = QEMU_OPT_BOOL, 617 .help = "Pass guest discard requests to the layer below", 618 }, 619 { 620 .name = QCOW2_OPT_DISCARD_SNAPSHOT, 621 .type = QEMU_OPT_BOOL, 622 .help = "Generate discard requests when snapshot related space " 623 "is freed", 624 }, 625 { 626 .name = QCOW2_OPT_DISCARD_OTHER, 627 .type = QEMU_OPT_BOOL, 628 .help = "Generate discard requests when other clusters are freed", 629 }, 630 { 631 .name = QCOW2_OPT_OVERLAP, 632 .type = QEMU_OPT_STRING, 633 .help = "Selects which overlap checks to perform from a range of " 634 "templates (none, constant, cached, all)", 635 }, 636 { 637 .name = QCOW2_OPT_OVERLAP_TEMPLATE, 638 .type = QEMU_OPT_STRING, 639 .help = "Selects which overlap checks to perform from a range of " 640 "templates (none, constant, cached, all)", 641 }, 642 { 643 .name = QCOW2_OPT_OVERLAP_MAIN_HEADER, 644 .type = QEMU_OPT_BOOL, 645 .help = "Check for unintended writes into the main qcow2 header", 646 }, 647 { 648 .name = QCOW2_OPT_OVERLAP_ACTIVE_L1, 649 .type = QEMU_OPT_BOOL, 650 .help = "Check for unintended writes into the active L1 table", 651 }, 652 { 653 .name = QCOW2_OPT_OVERLAP_ACTIVE_L2, 654 .type = QEMU_OPT_BOOL, 655 .help = "Check for unintended writes into an active L2 table", 656 }, 657 { 658 .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 659 .type = QEMU_OPT_BOOL, 660 .help = "Check for unintended writes into the refcount table", 661 }, 662 { 663 .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 664 .type = QEMU_OPT_BOOL, 665 .help = "Check for unintended writes into a refcount block", 666 }, 667 { 668 .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 669 .type = QEMU_OPT_BOOL, 670 .help = "Check for unintended writes into the snapshot table", 671 }, 672 { 673 .name = QCOW2_OPT_OVERLAP_INACTIVE_L1, 674 .type = QEMU_OPT_BOOL, 675 .help = "Check for unintended writes into an inactive L1 table", 676 }, 677 { 678 .name = QCOW2_OPT_OVERLAP_INACTIVE_L2, 679 .type = QEMU_OPT_BOOL, 680 .help = "Check for unintended writes into an inactive L2 table", 681 }, 682 { 683 .name = QCOW2_OPT_CACHE_SIZE, 684 .type = QEMU_OPT_SIZE, 685 .help = "Maximum combined metadata (L2 tables and refcount blocks) " 686 "cache size", 687 }, 688 { 689 .name = QCOW2_OPT_L2_CACHE_SIZE, 690 .type = QEMU_OPT_SIZE, 691 .help = "Maximum L2 table cache size", 692 }, 693 { 694 .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE, 695 .type = QEMU_OPT_SIZE, 696 .help = "Size of each entry in the L2 cache", 697 }, 698 { 699 .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE, 700 .type = QEMU_OPT_SIZE, 701 .help = "Maximum refcount block cache size", 702 }, 703 { 704 .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, 705 .type = QEMU_OPT_NUMBER, 706 .help = "Clean unused cache entries after this time (in seconds)", 707 }, 708 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", 709 "ID of secret providing qcow2 AES key or LUKS passphrase"), 710 { /* end of list */ } 711 }, 712 }; 713 714 static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { 715 [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER, 716 [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1, 717 [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2, 718 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 719 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 720 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 721 [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1, 722 [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, 723 }; 724 725 static void cache_clean_timer_cb(void *opaque) 726 { 727 BlockDriverState *bs = opaque; 728 BDRVQcow2State *s = bs->opaque; 729 qcow2_cache_clean_unused(s->l2_table_cache); 730 qcow2_cache_clean_unused(s->refcount_block_cache); 731 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 732 (int64_t) s->cache_clean_interval * 1000); 733 } 734 735 static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) 736 { 737 BDRVQcow2State *s = bs->opaque; 738 if (s->cache_clean_interval > 0) { 739 s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, 740 SCALE_MS, cache_clean_timer_cb, 741 bs); 742 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 743 (int64_t) s->cache_clean_interval * 1000); 744 } 745 } 746 747 static void cache_clean_timer_del(BlockDriverState *bs) 748 { 749 BDRVQcow2State *s = bs->opaque; 750 if (s->cache_clean_timer) { 751 timer_del(s->cache_clean_timer); 752 timer_free(s->cache_clean_timer); 753 s->cache_clean_timer = NULL; 754 } 755 } 756 757 static void qcow2_detach_aio_context(BlockDriverState *bs) 758 { 759 cache_clean_timer_del(bs); 760 } 761 762 static void qcow2_attach_aio_context(BlockDriverState *bs, 763 AioContext *new_context) 764 { 765 cache_clean_timer_init(bs, new_context); 766 } 767 768 static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, 769 uint64_t *l2_cache_size, 770 uint64_t *l2_cache_entry_size, 771 uint64_t *refcount_cache_size, Error **errp) 772 { 773 BDRVQcow2State *s = bs->opaque; 774 uint64_t combined_cache_size; 775 bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; 776 int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; 777 778 combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); 779 l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); 780 refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 781 782 combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0); 783 *l2_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 0); 784 *refcount_cache_size = qemu_opt_get_size(opts, 785 QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); 786 787 *l2_cache_entry_size = qemu_opt_get_size( 788 opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size); 789 790 if (combined_cache_size_set) { 791 if (l2_cache_size_set && refcount_cache_size_set) { 792 error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE 793 " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set " 794 "the same time"); 795 return; 796 } else if (*l2_cache_size > combined_cache_size) { 797 error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed " 798 QCOW2_OPT_CACHE_SIZE); 799 return; 800 } else if (*refcount_cache_size > combined_cache_size) { 801 error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed " 802 QCOW2_OPT_CACHE_SIZE); 803 return; 804 } 805 806 if (l2_cache_size_set) { 807 *refcount_cache_size = combined_cache_size - *l2_cache_size; 808 } else if (refcount_cache_size_set) { 809 *l2_cache_size = combined_cache_size - *refcount_cache_size; 810 } else { 811 uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; 812 uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); 813 814 /* Assign as much memory as possible to the L2 cache, and 815 * use the remainder for the refcount cache */ 816 if (combined_cache_size >= max_l2_cache + min_refcount_cache) { 817 *l2_cache_size = max_l2_cache; 818 *refcount_cache_size = combined_cache_size - *l2_cache_size; 819 } else { 820 *refcount_cache_size = 821 MIN(combined_cache_size, min_refcount_cache); 822 *l2_cache_size = combined_cache_size - *refcount_cache_size; 823 } 824 } 825 } else { 826 if (!l2_cache_size_set) { 827 *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE, 828 (uint64_t)DEFAULT_L2_CACHE_CLUSTERS 829 * s->cluster_size); 830 } 831 if (!refcount_cache_size_set) { 832 *refcount_cache_size = min_refcount_cache; 833 } 834 } 835 836 if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) || 837 *l2_cache_entry_size > s->cluster_size || 838 !is_power_of_2(*l2_cache_entry_size)) { 839 error_setg(errp, "L2 cache entry size must be a power of two " 840 "between %d and the cluster size (%d)", 841 1 << MIN_CLUSTER_BITS, s->cluster_size); 842 return; 843 } 844 } 845 846 typedef struct Qcow2ReopenState { 847 Qcow2Cache *l2_table_cache; 848 Qcow2Cache *refcount_block_cache; 849 int l2_slice_size; /* Number of entries in a slice of the L2 table */ 850 bool use_lazy_refcounts; 851 int overlap_check; 852 bool discard_passthrough[QCOW2_DISCARD_MAX]; 853 uint64_t cache_clean_interval; 854 QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ 855 } Qcow2ReopenState; 856 857 static int qcow2_update_options_prepare(BlockDriverState *bs, 858 Qcow2ReopenState *r, 859 QDict *options, int flags, 860 Error **errp) 861 { 862 BDRVQcow2State *s = bs->opaque; 863 QemuOpts *opts = NULL; 864 const char *opt_overlap_check, *opt_overlap_check_template; 865 int overlap_check_template = 0; 866 uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size; 867 int i; 868 const char *encryptfmt; 869 QDict *encryptopts = NULL; 870 Error *local_err = NULL; 871 int ret; 872 873 qdict_extract_subqdict(options, &encryptopts, "encrypt."); 874 encryptfmt = qdict_get_try_str(encryptopts, "format"); 875 876 opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); 877 qemu_opts_absorb_qdict(opts, options, &local_err); 878 if (local_err) { 879 error_propagate(errp, local_err); 880 ret = -EINVAL; 881 goto fail; 882 } 883 884 /* get L2 table/refcount block cache size from command line options */ 885 read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size, 886 &refcount_cache_size, &local_err); 887 if (local_err) { 888 error_propagate(errp, local_err); 889 ret = -EINVAL; 890 goto fail; 891 } 892 893 l2_cache_size /= l2_cache_entry_size; 894 if (l2_cache_size < MIN_L2_CACHE_SIZE) { 895 l2_cache_size = MIN_L2_CACHE_SIZE; 896 } 897 if (l2_cache_size > INT_MAX) { 898 error_setg(errp, "L2 cache size too big"); 899 ret = -EINVAL; 900 goto fail; 901 } 902 903 refcount_cache_size /= s->cluster_size; 904 if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { 905 refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; 906 } 907 if (refcount_cache_size > INT_MAX) { 908 error_setg(errp, "Refcount cache size too big"); 909 ret = -EINVAL; 910 goto fail; 911 } 912 913 /* alloc new L2 table/refcount block cache, flush old one */ 914 if (s->l2_table_cache) { 915 ret = qcow2_cache_flush(bs, s->l2_table_cache); 916 if (ret) { 917 error_setg_errno(errp, -ret, "Failed to flush the L2 table cache"); 918 goto fail; 919 } 920 } 921 922 if (s->refcount_block_cache) { 923 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 924 if (ret) { 925 error_setg_errno(errp, -ret, 926 "Failed to flush the refcount block cache"); 927 goto fail; 928 } 929 } 930 931 r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t); 932 r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size, 933 l2_cache_entry_size); 934 r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size, 935 s->cluster_size); 936 if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { 937 error_setg(errp, "Could not allocate metadata caches"); 938 ret = -ENOMEM; 939 goto fail; 940 } 941 942 /* New interval for cache cleanup timer */ 943 r->cache_clean_interval = 944 qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 945 s->cache_clean_interval); 946 #ifndef CONFIG_LINUX 947 if (r->cache_clean_interval != 0) { 948 error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL 949 " not supported on this host"); 950 ret = -EINVAL; 951 goto fail; 952 } 953 #endif 954 if (r->cache_clean_interval > UINT_MAX) { 955 error_setg(errp, "Cache clean interval too big"); 956 ret = -EINVAL; 957 goto fail; 958 } 959 960 /* lazy-refcounts; flush if going from enabled to disabled */ 961 r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, 962 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); 963 if (r->use_lazy_refcounts && s->qcow_version < 3) { 964 error_setg(errp, "Lazy refcounts require a qcow2 image with at least " 965 "qemu 1.1 compatibility level"); 966 ret = -EINVAL; 967 goto fail; 968 } 969 970 if (s->use_lazy_refcounts && !r->use_lazy_refcounts) { 971 ret = qcow2_mark_clean(bs); 972 if (ret < 0) { 973 error_setg_errno(errp, -ret, "Failed to disable lazy refcounts"); 974 goto fail; 975 } 976 } 977 978 /* Overlap check options */ 979 opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); 980 opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); 981 if (opt_overlap_check_template && opt_overlap_check && 982 strcmp(opt_overlap_check_template, opt_overlap_check)) 983 { 984 error_setg(errp, "Conflicting values for qcow2 options '" 985 QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE 986 "' ('%s')", opt_overlap_check, opt_overlap_check_template); 987 ret = -EINVAL; 988 goto fail; 989 } 990 if (!opt_overlap_check) { 991 opt_overlap_check = opt_overlap_check_template ?: "cached"; 992 } 993 994 if (!strcmp(opt_overlap_check, "none")) { 995 overlap_check_template = 0; 996 } else if (!strcmp(opt_overlap_check, "constant")) { 997 overlap_check_template = QCOW2_OL_CONSTANT; 998 } else if (!strcmp(opt_overlap_check, "cached")) { 999 overlap_check_template = QCOW2_OL_CACHED; 1000 } else if (!strcmp(opt_overlap_check, "all")) { 1001 overlap_check_template = QCOW2_OL_ALL; 1002 } else { 1003 error_setg(errp, "Unsupported value '%s' for qcow2 option " 1004 "'overlap-check'. Allowed are any of the following: " 1005 "none, constant, cached, all", opt_overlap_check); 1006 ret = -EINVAL; 1007 goto fail; 1008 } 1009 1010 r->overlap_check = 0; 1011 for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { 1012 /* overlap-check defines a template bitmask, but every flag may be 1013 * overwritten through the associated boolean option */ 1014 r->overlap_check |= 1015 qemu_opt_get_bool(opts, overlap_bool_option_names[i], 1016 overlap_check_template & (1 << i)) << i; 1017 } 1018 1019 r->discard_passthrough[QCOW2_DISCARD_NEVER] = false; 1020 r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; 1021 r->discard_passthrough[QCOW2_DISCARD_REQUEST] = 1022 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, 1023 flags & BDRV_O_UNMAP); 1024 r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = 1025 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); 1026 r->discard_passthrough[QCOW2_DISCARD_OTHER] = 1027 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); 1028 1029 switch (s->crypt_method_header) { 1030 case QCOW_CRYPT_NONE: 1031 if (encryptfmt) { 1032 error_setg(errp, "No encryption in image header, but options " 1033 "specified format '%s'", encryptfmt); 1034 ret = -EINVAL; 1035 goto fail; 1036 } 1037 break; 1038 1039 case QCOW_CRYPT_AES: 1040 if (encryptfmt && !g_str_equal(encryptfmt, "aes")) { 1041 error_setg(errp, 1042 "Header reported 'aes' encryption format but " 1043 "options specify '%s'", encryptfmt); 1044 ret = -EINVAL; 1045 goto fail; 1046 } 1047 qdict_put_str(encryptopts, "format", "qcow"); 1048 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); 1049 break; 1050 1051 case QCOW_CRYPT_LUKS: 1052 if (encryptfmt && !g_str_equal(encryptfmt, "luks")) { 1053 error_setg(errp, 1054 "Header reported 'luks' encryption format but " 1055 "options specify '%s'", encryptfmt); 1056 ret = -EINVAL; 1057 goto fail; 1058 } 1059 qdict_put_str(encryptopts, "format", "luks"); 1060 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); 1061 break; 1062 1063 default: 1064 error_setg(errp, "Unsupported encryption method %d", 1065 s->crypt_method_header); 1066 break; 1067 } 1068 if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) { 1069 ret = -EINVAL; 1070 goto fail; 1071 } 1072 1073 ret = 0; 1074 fail: 1075 qobject_unref(encryptopts); 1076 qemu_opts_del(opts); 1077 opts = NULL; 1078 return ret; 1079 } 1080 1081 static void qcow2_update_options_commit(BlockDriverState *bs, 1082 Qcow2ReopenState *r) 1083 { 1084 BDRVQcow2State *s = bs->opaque; 1085 int i; 1086 1087 if (s->l2_table_cache) { 1088 qcow2_cache_destroy(s->l2_table_cache); 1089 } 1090 if (s->refcount_block_cache) { 1091 qcow2_cache_destroy(s->refcount_block_cache); 1092 } 1093 s->l2_table_cache = r->l2_table_cache; 1094 s->refcount_block_cache = r->refcount_block_cache; 1095 s->l2_slice_size = r->l2_slice_size; 1096 1097 s->overlap_check = r->overlap_check; 1098 s->use_lazy_refcounts = r->use_lazy_refcounts; 1099 1100 for (i = 0; i < QCOW2_DISCARD_MAX; i++) { 1101 s->discard_passthrough[i] = r->discard_passthrough[i]; 1102 } 1103 1104 if (s->cache_clean_interval != r->cache_clean_interval) { 1105 cache_clean_timer_del(bs); 1106 s->cache_clean_interval = r->cache_clean_interval; 1107 cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); 1108 } 1109 1110 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 1111 s->crypto_opts = r->crypto_opts; 1112 } 1113 1114 static void qcow2_update_options_abort(BlockDriverState *bs, 1115 Qcow2ReopenState *r) 1116 { 1117 if (r->l2_table_cache) { 1118 qcow2_cache_destroy(r->l2_table_cache); 1119 } 1120 if (r->refcount_block_cache) { 1121 qcow2_cache_destroy(r->refcount_block_cache); 1122 } 1123 qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); 1124 } 1125 1126 static int qcow2_update_options(BlockDriverState *bs, QDict *options, 1127 int flags, Error **errp) 1128 { 1129 Qcow2ReopenState r = {}; 1130 int ret; 1131 1132 ret = qcow2_update_options_prepare(bs, &r, options, flags, errp); 1133 if (ret >= 0) { 1134 qcow2_update_options_commit(bs, &r); 1135 } else { 1136 qcow2_update_options_abort(bs, &r); 1137 } 1138 1139 return ret; 1140 } 1141 1142 /* Called with s->lock held. */ 1143 static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, 1144 int flags, Error **errp) 1145 { 1146 BDRVQcow2State *s = bs->opaque; 1147 unsigned int len, i; 1148 int ret = 0; 1149 QCowHeader header; 1150 Error *local_err = NULL; 1151 uint64_t ext_end; 1152 uint64_t l1_vm_state_index; 1153 bool update_header = false; 1154 bool header_updated = false; 1155 1156 ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); 1157 if (ret < 0) { 1158 error_setg_errno(errp, -ret, "Could not read qcow2 header"); 1159 goto fail; 1160 } 1161 be32_to_cpus(&header.magic); 1162 be32_to_cpus(&header.version); 1163 be64_to_cpus(&header.backing_file_offset); 1164 be32_to_cpus(&header.backing_file_size); 1165 be64_to_cpus(&header.size); 1166 be32_to_cpus(&header.cluster_bits); 1167 be32_to_cpus(&header.crypt_method); 1168 be64_to_cpus(&header.l1_table_offset); 1169 be32_to_cpus(&header.l1_size); 1170 be64_to_cpus(&header.refcount_table_offset); 1171 be32_to_cpus(&header.refcount_table_clusters); 1172 be64_to_cpus(&header.snapshots_offset); 1173 be32_to_cpus(&header.nb_snapshots); 1174 1175 if (header.magic != QCOW_MAGIC) { 1176 error_setg(errp, "Image is not in qcow2 format"); 1177 ret = -EINVAL; 1178 goto fail; 1179 } 1180 if (header.version < 2 || header.version > 3) { 1181 error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version); 1182 ret = -ENOTSUP; 1183 goto fail; 1184 } 1185 1186 s->qcow_version = header.version; 1187 1188 /* Initialise cluster size */ 1189 if (header.cluster_bits < MIN_CLUSTER_BITS || 1190 header.cluster_bits > MAX_CLUSTER_BITS) { 1191 error_setg(errp, "Unsupported cluster size: 2^%" PRIu32, 1192 header.cluster_bits); 1193 ret = -EINVAL; 1194 goto fail; 1195 } 1196 1197 s->cluster_bits = header.cluster_bits; 1198 s->cluster_size = 1 << s->cluster_bits; 1199 s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS); 1200 1201 /* Initialise version 3 header fields */ 1202 if (header.version == 2) { 1203 header.incompatible_features = 0; 1204 header.compatible_features = 0; 1205 header.autoclear_features = 0; 1206 header.refcount_order = 4; 1207 header.header_length = 72; 1208 } else { 1209 be64_to_cpus(&header.incompatible_features); 1210 be64_to_cpus(&header.compatible_features); 1211 be64_to_cpus(&header.autoclear_features); 1212 be32_to_cpus(&header.refcount_order); 1213 be32_to_cpus(&header.header_length); 1214 1215 if (header.header_length < 104) { 1216 error_setg(errp, "qcow2 header too short"); 1217 ret = -EINVAL; 1218 goto fail; 1219 } 1220 } 1221 1222 if (header.header_length > s->cluster_size) { 1223 error_setg(errp, "qcow2 header exceeds cluster size"); 1224 ret = -EINVAL; 1225 goto fail; 1226 } 1227 1228 if (header.header_length > sizeof(header)) { 1229 s->unknown_header_fields_size = header.header_length - sizeof(header); 1230 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); 1231 ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, 1232 s->unknown_header_fields_size); 1233 if (ret < 0) { 1234 error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " 1235 "fields"); 1236 goto fail; 1237 } 1238 } 1239 1240 if (header.backing_file_offset > s->cluster_size) { 1241 error_setg(errp, "Invalid backing file offset"); 1242 ret = -EINVAL; 1243 goto fail; 1244 } 1245 1246 if (header.backing_file_offset) { 1247 ext_end = header.backing_file_offset; 1248 } else { 1249 ext_end = 1 << header.cluster_bits; 1250 } 1251 1252 /* Handle feature bits */ 1253 s->incompatible_features = header.incompatible_features; 1254 s->compatible_features = header.compatible_features; 1255 s->autoclear_features = header.autoclear_features; 1256 1257 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { 1258 void *feature_table = NULL; 1259 qcow2_read_extensions(bs, header.header_length, ext_end, 1260 &feature_table, flags, NULL, NULL); 1261 report_unsupported_feature(errp, feature_table, 1262 s->incompatible_features & 1263 ~QCOW2_INCOMPAT_MASK); 1264 ret = -ENOTSUP; 1265 g_free(feature_table); 1266 goto fail; 1267 } 1268 1269 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 1270 /* Corrupt images may not be written to unless they are being repaired 1271 */ 1272 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { 1273 error_setg(errp, "qcow2: Image is corrupt; cannot be opened " 1274 "read/write"); 1275 ret = -EACCES; 1276 goto fail; 1277 } 1278 } 1279 1280 /* Check support for various header values */ 1281 if (header.refcount_order > 6) { 1282 error_setg(errp, "Reference count entry width too large; may not " 1283 "exceed 64 bits"); 1284 ret = -EINVAL; 1285 goto fail; 1286 } 1287 s->refcount_order = header.refcount_order; 1288 s->refcount_bits = 1 << s->refcount_order; 1289 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); 1290 s->refcount_max += s->refcount_max - 1; 1291 1292 s->crypt_method_header = header.crypt_method; 1293 if (s->crypt_method_header) { 1294 if (bdrv_uses_whitelist() && 1295 s->crypt_method_header == QCOW_CRYPT_AES) { 1296 error_setg(errp, 1297 "Use of AES-CBC encrypted qcow2 images is no longer " 1298 "supported in system emulators"); 1299 error_append_hint(errp, 1300 "You can use 'qemu-img convert' to convert your " 1301 "image to an alternative supported format, such " 1302 "as unencrypted qcow2, or raw with the LUKS " 1303 "format instead.\n"); 1304 ret = -ENOSYS; 1305 goto fail; 1306 } 1307 1308 if (s->crypt_method_header == QCOW_CRYPT_AES) { 1309 s->crypt_physical_offset = false; 1310 } else { 1311 /* Assuming LUKS and any future crypt methods we 1312 * add will all use physical offsets, due to the 1313 * fact that the alternative is insecure... */ 1314 s->crypt_physical_offset = true; 1315 } 1316 1317 bs->encrypted = true; 1318 } 1319 1320 s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ 1321 s->l2_size = 1 << s->l2_bits; 1322 /* 2^(s->refcount_order - 3) is the refcount width in bytes */ 1323 s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3); 1324 s->refcount_block_size = 1 << s->refcount_block_bits; 1325 bs->total_sectors = header.size / 512; 1326 s->csize_shift = (62 - (s->cluster_bits - 8)); 1327 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; 1328 s->cluster_offset_mask = (1LL << s->csize_shift) - 1; 1329 1330 s->refcount_table_offset = header.refcount_table_offset; 1331 s->refcount_table_size = 1332 header.refcount_table_clusters << (s->cluster_bits - 3); 1333 1334 if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) { 1335 error_setg(errp, "Image does not contain a reference count table"); 1336 ret = -EINVAL; 1337 goto fail; 1338 } 1339 1340 ret = qcow2_validate_table(bs, s->refcount_table_offset, 1341 header.refcount_table_clusters, 1342 s->cluster_size, QCOW_MAX_REFTABLE_SIZE, 1343 "Reference count table", errp); 1344 if (ret < 0) { 1345 goto fail; 1346 } 1347 1348 /* The total size in bytes of the snapshot table is checked in 1349 * qcow2_read_snapshots() because the size of each snapshot is 1350 * variable and we don't know it yet. 1351 * Here we only check the offset and number of snapshots. */ 1352 ret = qcow2_validate_table(bs, header.snapshots_offset, 1353 header.nb_snapshots, 1354 sizeof(QCowSnapshotHeader), 1355 sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS, 1356 "Snapshot table", errp); 1357 if (ret < 0) { 1358 goto fail; 1359 } 1360 1361 /* read the level 1 table */ 1362 ret = qcow2_validate_table(bs, header.l1_table_offset, 1363 header.l1_size, sizeof(uint64_t), 1364 QCOW_MAX_L1_SIZE, "Active L1 table", errp); 1365 if (ret < 0) { 1366 goto fail; 1367 } 1368 s->l1_size = header.l1_size; 1369 s->l1_table_offset = header.l1_table_offset; 1370 1371 l1_vm_state_index = size_to_l1(s, header.size); 1372 if (l1_vm_state_index > INT_MAX) { 1373 error_setg(errp, "Image is too big"); 1374 ret = -EFBIG; 1375 goto fail; 1376 } 1377 s->l1_vm_state_index = l1_vm_state_index; 1378 1379 /* the L1 table must contain at least enough entries to put 1380 header.size bytes */ 1381 if (s->l1_size < s->l1_vm_state_index) { 1382 error_setg(errp, "L1 table is too small"); 1383 ret = -EINVAL; 1384 goto fail; 1385 } 1386 1387 if (s->l1_size > 0) { 1388 s->l1_table = qemu_try_blockalign(bs->file->bs, 1389 ROUND_UP(s->l1_size * sizeof(uint64_t), 512)); 1390 if (s->l1_table == NULL) { 1391 error_setg(errp, "Could not allocate L1 table"); 1392 ret = -ENOMEM; 1393 goto fail; 1394 } 1395 ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, 1396 s->l1_size * sizeof(uint64_t)); 1397 if (ret < 0) { 1398 error_setg_errno(errp, -ret, "Could not read L1 table"); 1399 goto fail; 1400 } 1401 for(i = 0;i < s->l1_size; i++) { 1402 be64_to_cpus(&s->l1_table[i]); 1403 } 1404 } 1405 1406 /* Parse driver-specific options */ 1407 ret = qcow2_update_options(bs, options, flags, errp); 1408 if (ret < 0) { 1409 goto fail; 1410 } 1411 1412 s->cluster_cache_offset = -1; 1413 s->flags = flags; 1414 1415 ret = qcow2_refcount_init(bs); 1416 if (ret != 0) { 1417 error_setg_errno(errp, -ret, "Could not initialize refcount handling"); 1418 goto fail; 1419 } 1420 1421 QLIST_INIT(&s->cluster_allocs); 1422 QTAILQ_INIT(&s->discards); 1423 1424 /* read qcow2 extensions */ 1425 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL, 1426 flags, &update_header, &local_err)) { 1427 error_propagate(errp, local_err); 1428 ret = -EINVAL; 1429 goto fail; 1430 } 1431 1432 /* qcow2_read_extension may have set up the crypto context 1433 * if the crypt method needs a header region, some methods 1434 * don't need header extensions, so must check here 1435 */ 1436 if (s->crypt_method_header && !s->crypto) { 1437 if (s->crypt_method_header == QCOW_CRYPT_AES) { 1438 unsigned int cflags = 0; 1439 if (flags & BDRV_O_NO_IO) { 1440 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 1441 } 1442 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 1443 NULL, NULL, cflags, errp); 1444 if (!s->crypto) { 1445 ret = -EINVAL; 1446 goto fail; 1447 } 1448 } else if (!(flags & BDRV_O_NO_IO)) { 1449 error_setg(errp, "Missing CRYPTO header for crypt method %d", 1450 s->crypt_method_header); 1451 ret = -EINVAL; 1452 goto fail; 1453 } 1454 } 1455 1456 /* read the backing file name */ 1457 if (header.backing_file_offset != 0) { 1458 len = header.backing_file_size; 1459 if (len > MIN(1023, s->cluster_size - header.backing_file_offset) || 1460 len >= sizeof(bs->backing_file)) { 1461 error_setg(errp, "Backing file name too long"); 1462 ret = -EINVAL; 1463 goto fail; 1464 } 1465 ret = bdrv_pread(bs->file, header.backing_file_offset, 1466 bs->backing_file, len); 1467 if (ret < 0) { 1468 error_setg_errno(errp, -ret, "Could not read backing file name"); 1469 goto fail; 1470 } 1471 bs->backing_file[len] = '\0'; 1472 s->image_backing_file = g_strdup(bs->backing_file); 1473 } 1474 1475 /* Internal snapshots */ 1476 s->snapshots_offset = header.snapshots_offset; 1477 s->nb_snapshots = header.nb_snapshots; 1478 1479 ret = qcow2_read_snapshots(bs); 1480 if (ret < 0) { 1481 error_setg_errno(errp, -ret, "Could not read snapshots"); 1482 goto fail; 1483 } 1484 1485 /* Clear unknown autoclear feature bits */ 1486 update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; 1487 update_header = 1488 update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); 1489 if (update_header) { 1490 s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; 1491 } 1492 1493 if (s->dirty_bitmaps_loaded) { 1494 /* It's some kind of reopen. There are no known cases where we need to 1495 * reload bitmaps in such a situation, so it's safer to skip them. 1496 * 1497 * Moreover, if we have some readonly bitmaps and we are reopening for 1498 * rw we should reopen bitmaps correspondingly. 1499 */ 1500 if (bdrv_has_readonly_bitmaps(bs) && 1501 !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) 1502 { 1503 qcow2_reopen_bitmaps_rw_hint(bs, &header_updated, &local_err); 1504 } 1505 } else { 1506 header_updated = qcow2_load_dirty_bitmaps(bs, &local_err); 1507 s->dirty_bitmaps_loaded = true; 1508 } 1509 update_header = update_header && !header_updated; 1510 if (local_err != NULL) { 1511 error_propagate(errp, local_err); 1512 ret = -EINVAL; 1513 goto fail; 1514 } 1515 1516 if (update_header) { 1517 ret = qcow2_update_header(bs); 1518 if (ret < 0) { 1519 error_setg_errno(errp, -ret, "Could not update qcow2 header"); 1520 goto fail; 1521 } 1522 } 1523 1524 bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0; 1525 1526 /* Repair image if dirty */ 1527 if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && 1528 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { 1529 BdrvCheckResult result = {0}; 1530 1531 ret = qcow2_co_check_locked(bs, &result, 1532 BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); 1533 if (ret < 0 || result.check_errors) { 1534 if (ret >= 0) { 1535 ret = -EIO; 1536 } 1537 error_setg_errno(errp, -ret, "Could not repair dirty image"); 1538 goto fail; 1539 } 1540 } 1541 1542 #ifdef DEBUG_ALLOC 1543 { 1544 BdrvCheckResult result = {0}; 1545 qcow2_check_refcounts(bs, &result, 0); 1546 } 1547 #endif 1548 1549 qemu_co_queue_init(&s->compress_wait_queue); 1550 1551 return ret; 1552 1553 fail: 1554 g_free(s->unknown_header_fields); 1555 cleanup_unknown_header_ext(bs); 1556 qcow2_free_snapshots(bs); 1557 qcow2_refcount_close(bs); 1558 qemu_vfree(s->l1_table); 1559 /* else pre-write overlap checks in cache_destroy may crash */ 1560 s->l1_table = NULL; 1561 cache_clean_timer_del(bs); 1562 if (s->l2_table_cache) { 1563 qcow2_cache_destroy(s->l2_table_cache); 1564 } 1565 if (s->refcount_block_cache) { 1566 qcow2_cache_destroy(s->refcount_block_cache); 1567 } 1568 qcrypto_block_free(s->crypto); 1569 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 1570 return ret; 1571 } 1572 1573 typedef struct QCow2OpenCo { 1574 BlockDriverState *bs; 1575 QDict *options; 1576 int flags; 1577 Error **errp; 1578 int ret; 1579 } QCow2OpenCo; 1580 1581 static void coroutine_fn qcow2_open_entry(void *opaque) 1582 { 1583 QCow2OpenCo *qoc = opaque; 1584 BDRVQcow2State *s = qoc->bs->opaque; 1585 1586 qemu_co_mutex_lock(&s->lock); 1587 qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp); 1588 qemu_co_mutex_unlock(&s->lock); 1589 } 1590 1591 static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, 1592 Error **errp) 1593 { 1594 BDRVQcow2State *s = bs->opaque; 1595 QCow2OpenCo qoc = { 1596 .bs = bs, 1597 .options = options, 1598 .flags = flags, 1599 .errp = errp, 1600 .ret = -EINPROGRESS 1601 }; 1602 1603 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 1604 false, errp); 1605 if (!bs->file) { 1606 return -EINVAL; 1607 } 1608 1609 /* Initialise locks */ 1610 qemu_co_mutex_init(&s->lock); 1611 1612 if (qemu_in_coroutine()) { 1613 /* From bdrv_co_create. */ 1614 qcow2_open_entry(&qoc); 1615 } else { 1616 qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc)); 1617 BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); 1618 } 1619 return qoc.ret; 1620 } 1621 1622 static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) 1623 { 1624 BDRVQcow2State *s = bs->opaque; 1625 1626 if (bs->encrypted) { 1627 /* Encryption works on a sector granularity */ 1628 bs->bl.request_alignment = BDRV_SECTOR_SIZE; 1629 } 1630 bs->bl.pwrite_zeroes_alignment = s->cluster_size; 1631 bs->bl.pdiscard_alignment = s->cluster_size; 1632 } 1633 1634 static int qcow2_reopen_prepare(BDRVReopenState *state, 1635 BlockReopenQueue *queue, Error **errp) 1636 { 1637 Qcow2ReopenState *r; 1638 int ret; 1639 1640 r = g_new0(Qcow2ReopenState, 1); 1641 state->opaque = r; 1642 1643 ret = qcow2_update_options_prepare(state->bs, r, state->options, 1644 state->flags, errp); 1645 if (ret < 0) { 1646 goto fail; 1647 } 1648 1649 /* We need to write out any unwritten data if we reopen read-only. */ 1650 if ((state->flags & BDRV_O_RDWR) == 0) { 1651 ret = qcow2_reopen_bitmaps_ro(state->bs, errp); 1652 if (ret < 0) { 1653 goto fail; 1654 } 1655 1656 ret = bdrv_flush(state->bs); 1657 if (ret < 0) { 1658 goto fail; 1659 } 1660 1661 ret = qcow2_mark_clean(state->bs); 1662 if (ret < 0) { 1663 goto fail; 1664 } 1665 } 1666 1667 return 0; 1668 1669 fail: 1670 qcow2_update_options_abort(state->bs, r); 1671 g_free(r); 1672 return ret; 1673 } 1674 1675 static void qcow2_reopen_commit(BDRVReopenState *state) 1676 { 1677 qcow2_update_options_commit(state->bs, state->opaque); 1678 g_free(state->opaque); 1679 } 1680 1681 static void qcow2_reopen_abort(BDRVReopenState *state) 1682 { 1683 qcow2_update_options_abort(state->bs, state->opaque); 1684 g_free(state->opaque); 1685 } 1686 1687 static void qcow2_join_options(QDict *options, QDict *old_options) 1688 { 1689 bool has_new_overlap_template = 1690 qdict_haskey(options, QCOW2_OPT_OVERLAP) || 1691 qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE); 1692 bool has_new_total_cache_size = 1693 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE); 1694 bool has_all_cache_options; 1695 1696 /* New overlap template overrides all old overlap options */ 1697 if (has_new_overlap_template) { 1698 qdict_del(old_options, QCOW2_OPT_OVERLAP); 1699 qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE); 1700 qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER); 1701 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1); 1702 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2); 1703 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE); 1704 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK); 1705 qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE); 1706 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1); 1707 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2); 1708 } 1709 1710 /* New total cache size overrides all old options */ 1711 if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) { 1712 qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE); 1713 qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 1714 } 1715 1716 qdict_join(options, old_options, false); 1717 1718 /* 1719 * If after merging all cache size options are set, an old total size is 1720 * overwritten. Do keep all options, however, if all three are new. The 1721 * resulting error message is what we want to happen. 1722 */ 1723 has_all_cache_options = 1724 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) || 1725 qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) || 1726 qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 1727 1728 if (has_all_cache_options && !has_new_total_cache_size) { 1729 qdict_del(options, QCOW2_OPT_CACHE_SIZE); 1730 } 1731 } 1732 1733 static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, 1734 bool want_zero, 1735 int64_t offset, int64_t count, 1736 int64_t *pnum, int64_t *map, 1737 BlockDriverState **file) 1738 { 1739 BDRVQcow2State *s = bs->opaque; 1740 uint64_t cluster_offset; 1741 int index_in_cluster, ret; 1742 unsigned int bytes; 1743 int status = 0; 1744 1745 bytes = MIN(INT_MAX, count); 1746 qemu_co_mutex_lock(&s->lock); 1747 ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); 1748 qemu_co_mutex_unlock(&s->lock); 1749 if (ret < 0) { 1750 return ret; 1751 } 1752 1753 *pnum = bytes; 1754 1755 if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && 1756 !s->crypto) { 1757 index_in_cluster = offset & (s->cluster_size - 1); 1758 *map = cluster_offset | index_in_cluster; 1759 *file = bs->file->bs; 1760 status |= BDRV_BLOCK_OFFSET_VALID; 1761 } 1762 if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) { 1763 status |= BDRV_BLOCK_ZERO; 1764 } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { 1765 status |= BDRV_BLOCK_DATA; 1766 } 1767 return status; 1768 } 1769 1770 static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, 1771 QCowL2Meta **pl2meta, 1772 bool link_l2) 1773 { 1774 int ret = 0; 1775 QCowL2Meta *l2meta = *pl2meta; 1776 1777 while (l2meta != NULL) { 1778 QCowL2Meta *next; 1779 1780 if (link_l2) { 1781 ret = qcow2_alloc_cluster_link_l2(bs, l2meta); 1782 if (ret) { 1783 goto out; 1784 } 1785 } else { 1786 qcow2_alloc_cluster_abort(bs, l2meta); 1787 } 1788 1789 /* Take the request off the list of running requests */ 1790 if (l2meta->nb_clusters != 0) { 1791 QLIST_REMOVE(l2meta, next_in_flight); 1792 } 1793 1794 qemu_co_queue_restart_all(&l2meta->dependent_requests); 1795 1796 next = l2meta->next; 1797 g_free(l2meta); 1798 l2meta = next; 1799 } 1800 out: 1801 *pl2meta = l2meta; 1802 return ret; 1803 } 1804 1805 static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, 1806 uint64_t bytes, QEMUIOVector *qiov, 1807 int flags) 1808 { 1809 BDRVQcow2State *s = bs->opaque; 1810 int offset_in_cluster; 1811 int ret; 1812 unsigned int cur_bytes; /* number of bytes in current iteration */ 1813 uint64_t cluster_offset = 0; 1814 uint64_t bytes_done = 0; 1815 QEMUIOVector hd_qiov; 1816 uint8_t *cluster_data = NULL; 1817 1818 qemu_iovec_init(&hd_qiov, qiov->niov); 1819 1820 qemu_co_mutex_lock(&s->lock); 1821 1822 while (bytes != 0) { 1823 1824 /* prepare next request */ 1825 cur_bytes = MIN(bytes, INT_MAX); 1826 if (s->crypto) { 1827 cur_bytes = MIN(cur_bytes, 1828 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 1829 } 1830 1831 ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset); 1832 if (ret < 0) { 1833 goto fail; 1834 } 1835 1836 offset_in_cluster = offset_into_cluster(s, offset); 1837 1838 qemu_iovec_reset(&hd_qiov); 1839 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); 1840 1841 switch (ret) { 1842 case QCOW2_CLUSTER_UNALLOCATED: 1843 1844 if (bs->backing) { 1845 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 1846 qemu_co_mutex_unlock(&s->lock); 1847 ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, 1848 &hd_qiov, 0); 1849 qemu_co_mutex_lock(&s->lock); 1850 if (ret < 0) { 1851 goto fail; 1852 } 1853 } else { 1854 /* Note: in this case, no need to wait */ 1855 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); 1856 } 1857 break; 1858 1859 case QCOW2_CLUSTER_ZERO_PLAIN: 1860 case QCOW2_CLUSTER_ZERO_ALLOC: 1861 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); 1862 break; 1863 1864 case QCOW2_CLUSTER_COMPRESSED: 1865 /* add AIO support for compressed blocks ? */ 1866 ret = qcow2_decompress_cluster(bs, cluster_offset); 1867 if (ret < 0) { 1868 goto fail; 1869 } 1870 1871 qemu_iovec_from_buf(&hd_qiov, 0, 1872 s->cluster_cache + offset_in_cluster, 1873 cur_bytes); 1874 break; 1875 1876 case QCOW2_CLUSTER_NORMAL: 1877 if ((cluster_offset & 511) != 0) { 1878 ret = -EIO; 1879 goto fail; 1880 } 1881 1882 if (bs->encrypted) { 1883 assert(s->crypto); 1884 1885 /* 1886 * For encrypted images, read everything into a temporary 1887 * contiguous buffer on which the AES functions can work. 1888 */ 1889 if (!cluster_data) { 1890 cluster_data = 1891 qemu_try_blockalign(bs->file->bs, 1892 QCOW_MAX_CRYPT_CLUSTERS 1893 * s->cluster_size); 1894 if (cluster_data == NULL) { 1895 ret = -ENOMEM; 1896 goto fail; 1897 } 1898 } 1899 1900 assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 1901 qemu_iovec_reset(&hd_qiov); 1902 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); 1903 } 1904 1905 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 1906 qemu_co_mutex_unlock(&s->lock); 1907 ret = bdrv_co_preadv(bs->file, 1908 cluster_offset + offset_in_cluster, 1909 cur_bytes, &hd_qiov, 0); 1910 qemu_co_mutex_lock(&s->lock); 1911 if (ret < 0) { 1912 goto fail; 1913 } 1914 if (bs->encrypted) { 1915 assert(s->crypto); 1916 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 1917 assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 1918 if (qcrypto_block_decrypt(s->crypto, 1919 (s->crypt_physical_offset ? 1920 cluster_offset + offset_in_cluster : 1921 offset), 1922 cluster_data, 1923 cur_bytes, 1924 NULL) < 0) { 1925 ret = -EIO; 1926 goto fail; 1927 } 1928 qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes); 1929 } 1930 break; 1931 1932 default: 1933 g_assert_not_reached(); 1934 ret = -EIO; 1935 goto fail; 1936 } 1937 1938 bytes -= cur_bytes; 1939 offset += cur_bytes; 1940 bytes_done += cur_bytes; 1941 } 1942 ret = 0; 1943 1944 fail: 1945 qemu_co_mutex_unlock(&s->lock); 1946 1947 qemu_iovec_destroy(&hd_qiov); 1948 qemu_vfree(cluster_data); 1949 1950 return ret; 1951 } 1952 1953 /* Check if it's possible to merge a write request with the writing of 1954 * the data from the COW regions */ 1955 static bool merge_cow(uint64_t offset, unsigned bytes, 1956 QEMUIOVector *hd_qiov, QCowL2Meta *l2meta) 1957 { 1958 QCowL2Meta *m; 1959 1960 for (m = l2meta; m != NULL; m = m->next) { 1961 /* If both COW regions are empty then there's nothing to merge */ 1962 if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) { 1963 continue; 1964 } 1965 1966 /* The data (middle) region must be immediately after the 1967 * start region */ 1968 if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) { 1969 continue; 1970 } 1971 1972 /* The end region must be immediately after the data (middle) 1973 * region */ 1974 if (m->offset + m->cow_end.offset != offset + bytes) { 1975 continue; 1976 } 1977 1978 /* Make sure that adding both COW regions to the QEMUIOVector 1979 * does not exceed IOV_MAX */ 1980 if (hd_qiov->niov > IOV_MAX - 2) { 1981 continue; 1982 } 1983 1984 m->data_qiov = hd_qiov; 1985 return true; 1986 } 1987 1988 return false; 1989 } 1990 1991 static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, 1992 uint64_t bytes, QEMUIOVector *qiov, 1993 int flags) 1994 { 1995 BDRVQcow2State *s = bs->opaque; 1996 int offset_in_cluster; 1997 int ret; 1998 unsigned int cur_bytes; /* number of sectors in current iteration */ 1999 uint64_t cluster_offset; 2000 QEMUIOVector hd_qiov; 2001 uint64_t bytes_done = 0; 2002 uint8_t *cluster_data = NULL; 2003 QCowL2Meta *l2meta = NULL; 2004 2005 trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); 2006 2007 qemu_iovec_init(&hd_qiov, qiov->niov); 2008 2009 s->cluster_cache_offset = -1; /* disable compressed cache */ 2010 2011 qemu_co_mutex_lock(&s->lock); 2012 2013 while (bytes != 0) { 2014 2015 l2meta = NULL; 2016 2017 trace_qcow2_writev_start_part(qemu_coroutine_self()); 2018 offset_in_cluster = offset_into_cluster(s, offset); 2019 cur_bytes = MIN(bytes, INT_MAX); 2020 if (bs->encrypted) { 2021 cur_bytes = MIN(cur_bytes, 2022 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size 2023 - offset_in_cluster); 2024 } 2025 2026 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, 2027 &cluster_offset, &l2meta); 2028 if (ret < 0) { 2029 goto fail; 2030 } 2031 2032 assert((cluster_offset & 511) == 0); 2033 2034 qemu_iovec_reset(&hd_qiov); 2035 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); 2036 2037 if (bs->encrypted) { 2038 assert(s->crypto); 2039 if (!cluster_data) { 2040 cluster_data = qemu_try_blockalign(bs->file->bs, 2041 QCOW_MAX_CRYPT_CLUSTERS 2042 * s->cluster_size); 2043 if (cluster_data == NULL) { 2044 ret = -ENOMEM; 2045 goto fail; 2046 } 2047 } 2048 2049 assert(hd_qiov.size <= 2050 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 2051 qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); 2052 2053 if (qcrypto_block_encrypt(s->crypto, 2054 (s->crypt_physical_offset ? 2055 cluster_offset + offset_in_cluster : 2056 offset), 2057 cluster_data, 2058 cur_bytes, NULL) < 0) { 2059 ret = -EIO; 2060 goto fail; 2061 } 2062 2063 qemu_iovec_reset(&hd_qiov); 2064 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); 2065 } 2066 2067 ret = qcow2_pre_write_overlap_check(bs, 0, 2068 cluster_offset + offset_in_cluster, cur_bytes); 2069 if (ret < 0) { 2070 goto fail; 2071 } 2072 2073 /* If we need to do COW, check if it's possible to merge the 2074 * writing of the guest data together with that of the COW regions. 2075 * If it's not possible (or not necessary) then write the 2076 * guest data now. */ 2077 if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) { 2078 qemu_co_mutex_unlock(&s->lock); 2079 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 2080 trace_qcow2_writev_data(qemu_coroutine_self(), 2081 cluster_offset + offset_in_cluster); 2082 ret = bdrv_co_pwritev(bs->file, 2083 cluster_offset + offset_in_cluster, 2084 cur_bytes, &hd_qiov, 0); 2085 qemu_co_mutex_lock(&s->lock); 2086 if (ret < 0) { 2087 goto fail; 2088 } 2089 } 2090 2091 ret = qcow2_handle_l2meta(bs, &l2meta, true); 2092 if (ret) { 2093 goto fail; 2094 } 2095 2096 bytes -= cur_bytes; 2097 offset += cur_bytes; 2098 bytes_done += cur_bytes; 2099 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); 2100 } 2101 ret = 0; 2102 2103 fail: 2104 qcow2_handle_l2meta(bs, &l2meta, false); 2105 2106 qemu_co_mutex_unlock(&s->lock); 2107 2108 qemu_iovec_destroy(&hd_qiov); 2109 qemu_vfree(cluster_data); 2110 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 2111 2112 return ret; 2113 } 2114 2115 static int qcow2_inactivate(BlockDriverState *bs) 2116 { 2117 BDRVQcow2State *s = bs->opaque; 2118 int ret, result = 0; 2119 Error *local_err = NULL; 2120 2121 qcow2_store_persistent_dirty_bitmaps(bs, &local_err); 2122 if (local_err != NULL) { 2123 result = -EINVAL; 2124 error_report_err(local_err); 2125 error_report("Persistent bitmaps are lost for node '%s'", 2126 bdrv_get_device_or_node_name(bs)); 2127 } 2128 2129 ret = qcow2_cache_flush(bs, s->l2_table_cache); 2130 if (ret) { 2131 result = ret; 2132 error_report("Failed to flush the L2 table cache: %s", 2133 strerror(-ret)); 2134 } 2135 2136 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 2137 if (ret) { 2138 result = ret; 2139 error_report("Failed to flush the refcount block cache: %s", 2140 strerror(-ret)); 2141 } 2142 2143 if (result == 0) { 2144 qcow2_mark_clean(bs); 2145 } 2146 2147 return result; 2148 } 2149 2150 static void qcow2_close(BlockDriverState *bs) 2151 { 2152 BDRVQcow2State *s = bs->opaque; 2153 qemu_vfree(s->l1_table); 2154 /* else pre-write overlap checks in cache_destroy may crash */ 2155 s->l1_table = NULL; 2156 2157 if (!(s->flags & BDRV_O_INACTIVE)) { 2158 qcow2_inactivate(bs); 2159 } 2160 2161 cache_clean_timer_del(bs); 2162 qcow2_cache_destroy(s->l2_table_cache); 2163 qcow2_cache_destroy(s->refcount_block_cache); 2164 2165 qcrypto_block_free(s->crypto); 2166 s->crypto = NULL; 2167 2168 g_free(s->unknown_header_fields); 2169 cleanup_unknown_header_ext(bs); 2170 2171 g_free(s->image_backing_file); 2172 g_free(s->image_backing_format); 2173 2174 g_free(s->cluster_cache); 2175 qemu_vfree(s->cluster_data); 2176 qcow2_refcount_close(bs); 2177 qcow2_free_snapshots(bs); 2178 } 2179 2180 static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs, 2181 Error **errp) 2182 { 2183 BDRVQcow2State *s = bs->opaque; 2184 int flags = s->flags; 2185 QCryptoBlock *crypto = NULL; 2186 QDict *options; 2187 Error *local_err = NULL; 2188 int ret; 2189 2190 /* 2191 * Backing files are read-only which makes all of their metadata immutable, 2192 * that means we don't have to worry about reopening them here. 2193 */ 2194 2195 crypto = s->crypto; 2196 s->crypto = NULL; 2197 2198 qcow2_close(bs); 2199 2200 memset(s, 0, sizeof(BDRVQcow2State)); 2201 options = qdict_clone_shallow(bs->options); 2202 2203 flags &= ~BDRV_O_INACTIVE; 2204 qemu_co_mutex_lock(&s->lock); 2205 ret = qcow2_do_open(bs, options, flags, &local_err); 2206 qemu_co_mutex_unlock(&s->lock); 2207 qobject_unref(options); 2208 if (local_err) { 2209 error_propagate(errp, local_err); 2210 error_prepend(errp, "Could not reopen qcow2 layer: "); 2211 bs->drv = NULL; 2212 return; 2213 } else if (ret < 0) { 2214 error_setg_errno(errp, -ret, "Could not reopen qcow2 layer"); 2215 bs->drv = NULL; 2216 return; 2217 } 2218 2219 s->crypto = crypto; 2220 } 2221 2222 static size_t header_ext_add(char *buf, uint32_t magic, const void *s, 2223 size_t len, size_t buflen) 2224 { 2225 QCowExtension *ext_backing_fmt = (QCowExtension*) buf; 2226 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); 2227 2228 if (buflen < ext_len) { 2229 return -ENOSPC; 2230 } 2231 2232 *ext_backing_fmt = (QCowExtension) { 2233 .magic = cpu_to_be32(magic), 2234 .len = cpu_to_be32(len), 2235 }; 2236 2237 if (len) { 2238 memcpy(buf + sizeof(QCowExtension), s, len); 2239 } 2240 2241 return ext_len; 2242 } 2243 2244 /* 2245 * Updates the qcow2 header, including the variable length parts of it, i.e. 2246 * the backing file name and all extensions. qcow2 was not designed to allow 2247 * such changes, so if we run out of space (we can only use the first cluster) 2248 * this function may fail. 2249 * 2250 * Returns 0 on success, -errno in error cases. 2251 */ 2252 int qcow2_update_header(BlockDriverState *bs) 2253 { 2254 BDRVQcow2State *s = bs->opaque; 2255 QCowHeader *header; 2256 char *buf; 2257 size_t buflen = s->cluster_size; 2258 int ret; 2259 uint64_t total_size; 2260 uint32_t refcount_table_clusters; 2261 size_t header_length; 2262 Qcow2UnknownHeaderExtension *uext; 2263 2264 buf = qemu_blockalign(bs, buflen); 2265 2266 /* Header structure */ 2267 header = (QCowHeader*) buf; 2268 2269 if (buflen < sizeof(*header)) { 2270 ret = -ENOSPC; 2271 goto fail; 2272 } 2273 2274 header_length = sizeof(*header) + s->unknown_header_fields_size; 2275 total_size = bs->total_sectors * BDRV_SECTOR_SIZE; 2276 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); 2277 2278 *header = (QCowHeader) { 2279 /* Version 2 fields */ 2280 .magic = cpu_to_be32(QCOW_MAGIC), 2281 .version = cpu_to_be32(s->qcow_version), 2282 .backing_file_offset = 0, 2283 .backing_file_size = 0, 2284 .cluster_bits = cpu_to_be32(s->cluster_bits), 2285 .size = cpu_to_be64(total_size), 2286 .crypt_method = cpu_to_be32(s->crypt_method_header), 2287 .l1_size = cpu_to_be32(s->l1_size), 2288 .l1_table_offset = cpu_to_be64(s->l1_table_offset), 2289 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset), 2290 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), 2291 .nb_snapshots = cpu_to_be32(s->nb_snapshots), 2292 .snapshots_offset = cpu_to_be64(s->snapshots_offset), 2293 2294 /* Version 3 fields */ 2295 .incompatible_features = cpu_to_be64(s->incompatible_features), 2296 .compatible_features = cpu_to_be64(s->compatible_features), 2297 .autoclear_features = cpu_to_be64(s->autoclear_features), 2298 .refcount_order = cpu_to_be32(s->refcount_order), 2299 .header_length = cpu_to_be32(header_length), 2300 }; 2301 2302 /* For older versions, write a shorter header */ 2303 switch (s->qcow_version) { 2304 case 2: 2305 ret = offsetof(QCowHeader, incompatible_features); 2306 break; 2307 case 3: 2308 ret = sizeof(*header); 2309 break; 2310 default: 2311 ret = -EINVAL; 2312 goto fail; 2313 } 2314 2315 buf += ret; 2316 buflen -= ret; 2317 memset(buf, 0, buflen); 2318 2319 /* Preserve any unknown field in the header */ 2320 if (s->unknown_header_fields_size) { 2321 if (buflen < s->unknown_header_fields_size) { 2322 ret = -ENOSPC; 2323 goto fail; 2324 } 2325 2326 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); 2327 buf += s->unknown_header_fields_size; 2328 buflen -= s->unknown_header_fields_size; 2329 } 2330 2331 /* Backing file format header extension */ 2332 if (s->image_backing_format) { 2333 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, 2334 s->image_backing_format, 2335 strlen(s->image_backing_format), 2336 buflen); 2337 if (ret < 0) { 2338 goto fail; 2339 } 2340 2341 buf += ret; 2342 buflen -= ret; 2343 } 2344 2345 /* Full disk encryption header pointer extension */ 2346 if (s->crypto_header.offset != 0) { 2347 cpu_to_be64s(&s->crypto_header.offset); 2348 cpu_to_be64s(&s->crypto_header.length); 2349 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER, 2350 &s->crypto_header, sizeof(s->crypto_header), 2351 buflen); 2352 be64_to_cpus(&s->crypto_header.offset); 2353 be64_to_cpus(&s->crypto_header.length); 2354 if (ret < 0) { 2355 goto fail; 2356 } 2357 buf += ret; 2358 buflen -= ret; 2359 } 2360 2361 /* Feature table */ 2362 if (s->qcow_version >= 3) { 2363 Qcow2Feature features[] = { 2364 { 2365 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2366 .bit = QCOW2_INCOMPAT_DIRTY_BITNR, 2367 .name = "dirty bit", 2368 }, 2369 { 2370 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2371 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, 2372 .name = "corrupt bit", 2373 }, 2374 { 2375 .type = QCOW2_FEAT_TYPE_COMPATIBLE, 2376 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, 2377 .name = "lazy refcounts", 2378 }, 2379 }; 2380 2381 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, 2382 features, sizeof(features), buflen); 2383 if (ret < 0) { 2384 goto fail; 2385 } 2386 buf += ret; 2387 buflen -= ret; 2388 } 2389 2390 /* Bitmap extension */ 2391 if (s->nb_bitmaps > 0) { 2392 Qcow2BitmapHeaderExt bitmaps_header = { 2393 .nb_bitmaps = cpu_to_be32(s->nb_bitmaps), 2394 .bitmap_directory_size = 2395 cpu_to_be64(s->bitmap_directory_size), 2396 .bitmap_directory_offset = 2397 cpu_to_be64(s->bitmap_directory_offset) 2398 }; 2399 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS, 2400 &bitmaps_header, sizeof(bitmaps_header), 2401 buflen); 2402 if (ret < 0) { 2403 goto fail; 2404 } 2405 buf += ret; 2406 buflen -= ret; 2407 } 2408 2409 /* Keep unknown header extensions */ 2410 QLIST_FOREACH(uext, &s->unknown_header_ext, next) { 2411 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); 2412 if (ret < 0) { 2413 goto fail; 2414 } 2415 2416 buf += ret; 2417 buflen -= ret; 2418 } 2419 2420 /* End of header extensions */ 2421 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); 2422 if (ret < 0) { 2423 goto fail; 2424 } 2425 2426 buf += ret; 2427 buflen -= ret; 2428 2429 /* Backing file name */ 2430 if (s->image_backing_file) { 2431 size_t backing_file_len = strlen(s->image_backing_file); 2432 2433 if (buflen < backing_file_len) { 2434 ret = -ENOSPC; 2435 goto fail; 2436 } 2437 2438 /* Using strncpy is ok here, since buf is not NUL-terminated. */ 2439 strncpy(buf, s->image_backing_file, buflen); 2440 2441 header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); 2442 header->backing_file_size = cpu_to_be32(backing_file_len); 2443 } 2444 2445 /* Write the new header */ 2446 ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); 2447 if (ret < 0) { 2448 goto fail; 2449 } 2450 2451 ret = 0; 2452 fail: 2453 qemu_vfree(header); 2454 return ret; 2455 } 2456 2457 static int qcow2_change_backing_file(BlockDriverState *bs, 2458 const char *backing_file, const char *backing_fmt) 2459 { 2460 BDRVQcow2State *s = bs->opaque; 2461 2462 if (backing_file && strlen(backing_file) > 1023) { 2463 return -EINVAL; 2464 } 2465 2466 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2467 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2468 2469 g_free(s->image_backing_file); 2470 g_free(s->image_backing_format); 2471 2472 s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL; 2473 s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL; 2474 2475 return qcow2_update_header(bs); 2476 } 2477 2478 static int qcow2_crypt_method_from_format(const char *encryptfmt) 2479 { 2480 if (g_str_equal(encryptfmt, "luks")) { 2481 return QCOW_CRYPT_LUKS; 2482 } else if (g_str_equal(encryptfmt, "aes")) { 2483 return QCOW_CRYPT_AES; 2484 } else { 2485 return -EINVAL; 2486 } 2487 } 2488 2489 static int qcow2_set_up_encryption(BlockDriverState *bs, 2490 QCryptoBlockCreateOptions *cryptoopts, 2491 Error **errp) 2492 { 2493 BDRVQcow2State *s = bs->opaque; 2494 QCryptoBlock *crypto = NULL; 2495 int fmt, ret; 2496 2497 switch (cryptoopts->format) { 2498 case Q_CRYPTO_BLOCK_FORMAT_LUKS: 2499 fmt = QCOW_CRYPT_LUKS; 2500 break; 2501 case Q_CRYPTO_BLOCK_FORMAT_QCOW: 2502 fmt = QCOW_CRYPT_AES; 2503 break; 2504 default: 2505 error_setg(errp, "Crypto format not supported in qcow2"); 2506 return -EINVAL; 2507 } 2508 2509 s->crypt_method_header = fmt; 2510 2511 crypto = qcrypto_block_create(cryptoopts, "encrypt.", 2512 qcow2_crypto_hdr_init_func, 2513 qcow2_crypto_hdr_write_func, 2514 bs, errp); 2515 if (!crypto) { 2516 return -EINVAL; 2517 } 2518 2519 ret = qcow2_update_header(bs); 2520 if (ret < 0) { 2521 error_setg_errno(errp, -ret, "Could not write encryption header"); 2522 goto out; 2523 } 2524 2525 ret = 0; 2526 out: 2527 qcrypto_block_free(crypto); 2528 return ret; 2529 } 2530 2531 /** 2532 * Preallocates metadata structures for data clusters between @offset (in the 2533 * guest disk) and @new_length (which is thus generally the new guest disk 2534 * size). 2535 * 2536 * Returns: 0 on success, -errno on failure. 2537 */ 2538 static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, 2539 uint64_t new_length) 2540 { 2541 uint64_t bytes; 2542 uint64_t host_offset = 0; 2543 unsigned int cur_bytes; 2544 int ret; 2545 QCowL2Meta *meta; 2546 2547 assert(offset <= new_length); 2548 bytes = new_length - offset; 2549 2550 while (bytes) { 2551 cur_bytes = MIN(bytes, INT_MAX); 2552 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, 2553 &host_offset, &meta); 2554 if (ret < 0) { 2555 return ret; 2556 } 2557 2558 while (meta) { 2559 QCowL2Meta *next = meta->next; 2560 2561 ret = qcow2_alloc_cluster_link_l2(bs, meta); 2562 if (ret < 0) { 2563 qcow2_free_any_clusters(bs, meta->alloc_offset, 2564 meta->nb_clusters, QCOW2_DISCARD_NEVER); 2565 return ret; 2566 } 2567 2568 /* There are no dependent requests, but we need to remove our 2569 * request from the list of in-flight requests */ 2570 QLIST_REMOVE(meta, next_in_flight); 2571 2572 g_free(meta); 2573 meta = next; 2574 } 2575 2576 /* TODO Preallocate data if requested */ 2577 2578 bytes -= cur_bytes; 2579 offset += cur_bytes; 2580 } 2581 2582 /* 2583 * It is expected that the image file is large enough to actually contain 2584 * all of the allocated clusters (otherwise we get failing reads after 2585 * EOF). Extend the image to the last allocated sector. 2586 */ 2587 if (host_offset != 0) { 2588 uint8_t data = 0; 2589 ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1, 2590 &data, 1); 2591 if (ret < 0) { 2592 return ret; 2593 } 2594 } 2595 2596 return 0; 2597 } 2598 2599 /* qcow2_refcount_metadata_size: 2600 * @clusters: number of clusters to refcount (including data and L1/L2 tables) 2601 * @cluster_size: size of a cluster, in bytes 2602 * @refcount_order: refcount bits power-of-2 exponent 2603 * @generous_increase: allow for the refcount table to be 1.5x as large as it 2604 * needs to be 2605 * 2606 * Returns: Number of bytes required for refcount blocks and table metadata. 2607 */ 2608 int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, 2609 int refcount_order, bool generous_increase, 2610 uint64_t *refblock_count) 2611 { 2612 /* 2613 * Every host cluster is reference-counted, including metadata (even 2614 * refcount metadata is recursively included). 2615 * 2616 * An accurate formula for the size of refcount metadata size is difficult 2617 * to derive. An easier method of calculation is finding the fixed point 2618 * where no further refcount blocks or table clusters are required to 2619 * reference count every cluster. 2620 */ 2621 int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t); 2622 int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order); 2623 int64_t table = 0; /* number of refcount table clusters */ 2624 int64_t blocks = 0; /* number of refcount block clusters */ 2625 int64_t last; 2626 int64_t n = 0; 2627 2628 do { 2629 last = n; 2630 blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block); 2631 table = DIV_ROUND_UP(blocks, blocks_per_table_cluster); 2632 n = clusters + blocks + table; 2633 2634 if (n == last && generous_increase) { 2635 clusters += DIV_ROUND_UP(table, 2); 2636 n = 0; /* force another loop */ 2637 generous_increase = false; 2638 } 2639 } while (n != last); 2640 2641 if (refblock_count) { 2642 *refblock_count = blocks; 2643 } 2644 2645 return (blocks + table) * cluster_size; 2646 } 2647 2648 /** 2649 * qcow2_calc_prealloc_size: 2650 * @total_size: virtual disk size in bytes 2651 * @cluster_size: cluster size in bytes 2652 * @refcount_order: refcount bits power-of-2 exponent 2653 * 2654 * Returns: Total number of bytes required for the fully allocated image 2655 * (including metadata). 2656 */ 2657 static int64_t qcow2_calc_prealloc_size(int64_t total_size, 2658 size_t cluster_size, 2659 int refcount_order) 2660 { 2661 int64_t meta_size = 0; 2662 uint64_t nl1e, nl2e; 2663 int64_t aligned_total_size = ROUND_UP(total_size, cluster_size); 2664 2665 /* header: 1 cluster */ 2666 meta_size += cluster_size; 2667 2668 /* total size of L2 tables */ 2669 nl2e = aligned_total_size / cluster_size; 2670 nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t)); 2671 meta_size += nl2e * sizeof(uint64_t); 2672 2673 /* total size of L1 tables */ 2674 nl1e = nl2e * sizeof(uint64_t) / cluster_size; 2675 nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t)); 2676 meta_size += nl1e * sizeof(uint64_t); 2677 2678 /* total size of refcount table and blocks */ 2679 meta_size += qcow2_refcount_metadata_size( 2680 (meta_size + aligned_total_size) / cluster_size, 2681 cluster_size, refcount_order, false, NULL); 2682 2683 return meta_size + aligned_total_size; 2684 } 2685 2686 static bool validate_cluster_size(size_t cluster_size, Error **errp) 2687 { 2688 int cluster_bits = ctz32(cluster_size); 2689 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || 2690 (1 << cluster_bits) != cluster_size) 2691 { 2692 error_setg(errp, "Cluster size must be a power of two between %d and " 2693 "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); 2694 return false; 2695 } 2696 return true; 2697 } 2698 2699 static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp) 2700 { 2701 size_t cluster_size; 2702 2703 cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 2704 DEFAULT_CLUSTER_SIZE); 2705 if (!validate_cluster_size(cluster_size, errp)) { 2706 return 0; 2707 } 2708 return cluster_size; 2709 } 2710 2711 static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp) 2712 { 2713 char *buf; 2714 int ret; 2715 2716 buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL); 2717 if (!buf) { 2718 ret = 3; /* default */ 2719 } else if (!strcmp(buf, "0.10")) { 2720 ret = 2; 2721 } else if (!strcmp(buf, "1.1")) { 2722 ret = 3; 2723 } else { 2724 error_setg(errp, "Invalid compatibility level: '%s'", buf); 2725 ret = -EINVAL; 2726 } 2727 g_free(buf); 2728 return ret; 2729 } 2730 2731 static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, 2732 Error **errp) 2733 { 2734 uint64_t refcount_bits; 2735 2736 refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16); 2737 if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { 2738 error_setg(errp, "Refcount width must be a power of two and may not " 2739 "exceed 64 bits"); 2740 return 0; 2741 } 2742 2743 if (version < 3 && refcount_bits != 16) { 2744 error_setg(errp, "Different refcount widths than 16 bits require " 2745 "compatibility level 1.1 or above (use compat=1.1 or " 2746 "greater)"); 2747 return 0; 2748 } 2749 2750 return refcount_bits; 2751 } 2752 2753 static int coroutine_fn 2754 qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) 2755 { 2756 BlockdevCreateOptionsQcow2 *qcow2_opts; 2757 QDict *options; 2758 2759 /* 2760 * Open the image file and write a minimal qcow2 header. 2761 * 2762 * We keep things simple and start with a zero-sized image. We also 2763 * do without refcount blocks or a L1 table for now. We'll fix the 2764 * inconsistency later. 2765 * 2766 * We do need a refcount table because growing the refcount table means 2767 * allocating two new refcount blocks - the seconds of which would be at 2768 * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file 2769 * size for any qcow2 image. 2770 */ 2771 BlockBackend *blk = NULL; 2772 BlockDriverState *bs = NULL; 2773 QCowHeader *header; 2774 size_t cluster_size; 2775 int version; 2776 int refcount_order; 2777 uint64_t* refcount_table; 2778 Error *local_err = NULL; 2779 int ret; 2780 2781 assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2); 2782 qcow2_opts = &create_options->u.qcow2; 2783 2784 bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp); 2785 if (bs == NULL) { 2786 return -EIO; 2787 } 2788 2789 /* Validate options and set default values */ 2790 if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) { 2791 error_setg(errp, "Image size must be a multiple of 512 bytes"); 2792 ret = -EINVAL; 2793 goto out; 2794 } 2795 2796 if (qcow2_opts->has_version) { 2797 switch (qcow2_opts->version) { 2798 case BLOCKDEV_QCOW2_VERSION_V2: 2799 version = 2; 2800 break; 2801 case BLOCKDEV_QCOW2_VERSION_V3: 2802 version = 3; 2803 break; 2804 default: 2805 g_assert_not_reached(); 2806 } 2807 } else { 2808 version = 3; 2809 } 2810 2811 if (qcow2_opts->has_cluster_size) { 2812 cluster_size = qcow2_opts->cluster_size; 2813 } else { 2814 cluster_size = DEFAULT_CLUSTER_SIZE; 2815 } 2816 2817 if (!validate_cluster_size(cluster_size, errp)) { 2818 ret = -EINVAL; 2819 goto out; 2820 } 2821 2822 if (!qcow2_opts->has_preallocation) { 2823 qcow2_opts->preallocation = PREALLOC_MODE_OFF; 2824 } 2825 if (qcow2_opts->has_backing_file && 2826 qcow2_opts->preallocation != PREALLOC_MODE_OFF) 2827 { 2828 error_setg(errp, "Backing file and preallocation cannot be used at " 2829 "the same time"); 2830 ret = -EINVAL; 2831 goto out; 2832 } 2833 if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) { 2834 error_setg(errp, "Backing format cannot be used without backing file"); 2835 ret = -EINVAL; 2836 goto out; 2837 } 2838 2839 if (!qcow2_opts->has_lazy_refcounts) { 2840 qcow2_opts->lazy_refcounts = false; 2841 } 2842 if (version < 3 && qcow2_opts->lazy_refcounts) { 2843 error_setg(errp, "Lazy refcounts only supported with compatibility " 2844 "level 1.1 and above (use version=v3 or greater)"); 2845 ret = -EINVAL; 2846 goto out; 2847 } 2848 2849 if (!qcow2_opts->has_refcount_bits) { 2850 qcow2_opts->refcount_bits = 16; 2851 } 2852 if (qcow2_opts->refcount_bits > 64 || 2853 !is_power_of_2(qcow2_opts->refcount_bits)) 2854 { 2855 error_setg(errp, "Refcount width must be a power of two and may not " 2856 "exceed 64 bits"); 2857 ret = -EINVAL; 2858 goto out; 2859 } 2860 if (version < 3 && qcow2_opts->refcount_bits != 16) { 2861 error_setg(errp, "Different refcount widths than 16 bits require " 2862 "compatibility level 1.1 or above (use version=v3 or " 2863 "greater)"); 2864 ret = -EINVAL; 2865 goto out; 2866 } 2867 refcount_order = ctz32(qcow2_opts->refcount_bits); 2868 2869 2870 /* Create BlockBackend to write to the image */ 2871 blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); 2872 ret = blk_insert_bs(blk, bs, errp); 2873 if (ret < 0) { 2874 goto out; 2875 } 2876 blk_set_allow_write_beyond_eof(blk, true); 2877 2878 /* Clear the protocol layer and preallocate it if necessary */ 2879 ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp); 2880 if (ret < 0) { 2881 goto out; 2882 } 2883 2884 if (qcow2_opts->preallocation == PREALLOC_MODE_FULL || 2885 qcow2_opts->preallocation == PREALLOC_MODE_FALLOC) 2886 { 2887 int64_t prealloc_size = 2888 qcow2_calc_prealloc_size(qcow2_opts->size, cluster_size, 2889 refcount_order); 2890 2891 ret = blk_truncate(blk, prealloc_size, qcow2_opts->preallocation, errp); 2892 if (ret < 0) { 2893 goto out; 2894 } 2895 } 2896 2897 /* Write the header */ 2898 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); 2899 header = g_malloc0(cluster_size); 2900 *header = (QCowHeader) { 2901 .magic = cpu_to_be32(QCOW_MAGIC), 2902 .version = cpu_to_be32(version), 2903 .cluster_bits = cpu_to_be32(ctz32(cluster_size)), 2904 .size = cpu_to_be64(0), 2905 .l1_table_offset = cpu_to_be64(0), 2906 .l1_size = cpu_to_be32(0), 2907 .refcount_table_offset = cpu_to_be64(cluster_size), 2908 .refcount_table_clusters = cpu_to_be32(1), 2909 .refcount_order = cpu_to_be32(refcount_order), 2910 .header_length = cpu_to_be32(sizeof(*header)), 2911 }; 2912 2913 /* We'll update this to correct value later */ 2914 header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 2915 2916 if (qcow2_opts->lazy_refcounts) { 2917 header->compatible_features |= 2918 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); 2919 } 2920 2921 ret = blk_pwrite(blk, 0, header, cluster_size, 0); 2922 g_free(header); 2923 if (ret < 0) { 2924 error_setg_errno(errp, -ret, "Could not write qcow2 header"); 2925 goto out; 2926 } 2927 2928 /* Write a refcount table with one refcount block */ 2929 refcount_table = g_malloc0(2 * cluster_size); 2930 refcount_table[0] = cpu_to_be64(2 * cluster_size); 2931 ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0); 2932 g_free(refcount_table); 2933 2934 if (ret < 0) { 2935 error_setg_errno(errp, -ret, "Could not write refcount table"); 2936 goto out; 2937 } 2938 2939 blk_unref(blk); 2940 blk = NULL; 2941 2942 /* 2943 * And now open the image and make it consistent first (i.e. increase the 2944 * refcount of the cluster that is occupied by the header and the refcount 2945 * table) 2946 */ 2947 options = qdict_new(); 2948 qdict_put_str(options, "driver", "qcow2"); 2949 qdict_put_str(options, "file", bs->node_name); 2950 blk = blk_new_open(NULL, NULL, options, 2951 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, 2952 &local_err); 2953 if (blk == NULL) { 2954 error_propagate(errp, local_err); 2955 ret = -EIO; 2956 goto out; 2957 } 2958 2959 ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); 2960 if (ret < 0) { 2961 error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " 2962 "header and refcount table"); 2963 goto out; 2964 2965 } else if (ret != 0) { 2966 error_report("Huh, first cluster in empty image is already in use?"); 2967 abort(); 2968 } 2969 2970 /* Create a full header (including things like feature table) */ 2971 ret = qcow2_update_header(blk_bs(blk)); 2972 if (ret < 0) { 2973 error_setg_errno(errp, -ret, "Could not update qcow2 header"); 2974 goto out; 2975 } 2976 2977 /* Okay, now that we have a valid image, let's give it the right size */ 2978 ret = blk_truncate(blk, qcow2_opts->size, PREALLOC_MODE_OFF, errp); 2979 if (ret < 0) { 2980 error_prepend(errp, "Could not resize image: "); 2981 goto out; 2982 } 2983 2984 /* Want a backing file? There you go.*/ 2985 if (qcow2_opts->has_backing_file) { 2986 const char *backing_format = NULL; 2987 2988 if (qcow2_opts->has_backing_fmt) { 2989 backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt); 2990 } 2991 2992 ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file, 2993 backing_format); 2994 if (ret < 0) { 2995 error_setg_errno(errp, -ret, "Could not assign backing file '%s' " 2996 "with format '%s'", qcow2_opts->backing_file, 2997 backing_format); 2998 goto out; 2999 } 3000 } 3001 3002 /* Want encryption? There you go. */ 3003 if (qcow2_opts->has_encrypt) { 3004 ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp); 3005 if (ret < 0) { 3006 goto out; 3007 } 3008 } 3009 3010 /* And if we're supposed to preallocate metadata, do that now */ 3011 if (qcow2_opts->preallocation != PREALLOC_MODE_OFF) { 3012 BDRVQcow2State *s = blk_bs(blk)->opaque; 3013 qemu_co_mutex_lock(&s->lock); 3014 ret = preallocate_co(blk_bs(blk), 0, qcow2_opts->size); 3015 qemu_co_mutex_unlock(&s->lock); 3016 3017 if (ret < 0) { 3018 error_setg_errno(errp, -ret, "Could not preallocate metadata"); 3019 goto out; 3020 } 3021 } 3022 3023 blk_unref(blk); 3024 blk = NULL; 3025 3026 /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. 3027 * Using BDRV_O_NO_IO, since encryption is now setup we don't want to 3028 * have to setup decryption context. We're not doing any I/O on the top 3029 * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does 3030 * not have effect. 3031 */ 3032 options = qdict_new(); 3033 qdict_put_str(options, "driver", "qcow2"); 3034 qdict_put_str(options, "file", bs->node_name); 3035 blk = blk_new_open(NULL, NULL, options, 3036 BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO, 3037 &local_err); 3038 if (blk == NULL) { 3039 error_propagate(errp, local_err); 3040 ret = -EIO; 3041 goto out; 3042 } 3043 3044 ret = 0; 3045 out: 3046 blk_unref(blk); 3047 bdrv_unref(bs); 3048 return ret; 3049 } 3050 3051 static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, 3052 Error **errp) 3053 { 3054 BlockdevCreateOptions *create_options = NULL; 3055 QDict *qdict; 3056 Visitor *v; 3057 BlockDriverState *bs = NULL; 3058 Error *local_err = NULL; 3059 const char *val; 3060 int ret; 3061 3062 /* Only the keyval visitor supports the dotted syntax needed for 3063 * encryption, so go through a QDict before getting a QAPI type. Ignore 3064 * options meant for the protocol layer so that the visitor doesn't 3065 * complain. */ 3066 qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts, 3067 true); 3068 3069 /* Handle encryption options */ 3070 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT); 3071 if (val && !strcmp(val, "on")) { 3072 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow"); 3073 } else if (val && !strcmp(val, "off")) { 3074 qdict_del(qdict, BLOCK_OPT_ENCRYPT); 3075 } 3076 3077 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT); 3078 if (val && !strcmp(val, "aes")) { 3079 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow"); 3080 } 3081 3082 /* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into 3083 * version=v2/v3 below. */ 3084 val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL); 3085 if (val && !strcmp(val, "0.10")) { 3086 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2"); 3087 } else if (val && !strcmp(val, "1.1")) { 3088 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3"); 3089 } 3090 3091 /* Change legacy command line options into QMP ones */ 3092 static const QDictRenames opt_renames[] = { 3093 { BLOCK_OPT_BACKING_FILE, "backing-file" }, 3094 { BLOCK_OPT_BACKING_FMT, "backing-fmt" }, 3095 { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" }, 3096 { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" }, 3097 { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" }, 3098 { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT }, 3099 { BLOCK_OPT_COMPAT_LEVEL, "version" }, 3100 { NULL, NULL }, 3101 }; 3102 3103 if (!qdict_rename_keys(qdict, opt_renames, errp)) { 3104 ret = -EINVAL; 3105 goto finish; 3106 } 3107 3108 /* Create and open the file (protocol layer) */ 3109 ret = bdrv_create_file(filename, opts, errp); 3110 if (ret < 0) { 3111 goto finish; 3112 } 3113 3114 bs = bdrv_open(filename, NULL, NULL, 3115 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); 3116 if (bs == NULL) { 3117 ret = -EIO; 3118 goto finish; 3119 } 3120 3121 /* Set 'driver' and 'node' options */ 3122 qdict_put_str(qdict, "driver", "qcow2"); 3123 qdict_put_str(qdict, "file", bs->node_name); 3124 3125 /* Now get the QAPI type BlockdevCreateOptions */ 3126 v = qobject_input_visitor_new_flat_confused(qdict, errp); 3127 if (!v) { 3128 ret = -EINVAL; 3129 goto finish; 3130 } 3131 3132 visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err); 3133 visit_free(v); 3134 3135 if (local_err) { 3136 error_propagate(errp, local_err); 3137 ret = -EINVAL; 3138 goto finish; 3139 } 3140 3141 /* Silently round up size */ 3142 create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size, 3143 BDRV_SECTOR_SIZE); 3144 3145 /* Create the qcow2 image (format layer) */ 3146 ret = qcow2_co_create(create_options, errp); 3147 if (ret < 0) { 3148 goto finish; 3149 } 3150 3151 ret = 0; 3152 finish: 3153 qobject_unref(qdict); 3154 bdrv_unref(bs); 3155 qapi_free_BlockdevCreateOptions(create_options); 3156 return ret; 3157 } 3158 3159 3160 static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) 3161 { 3162 int64_t nr; 3163 int res; 3164 3165 /* Clamp to image length, before checking status of underlying sectors */ 3166 if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { 3167 bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset; 3168 } 3169 3170 if (!bytes) { 3171 return true; 3172 } 3173 res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL); 3174 return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes; 3175 } 3176 3177 static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, 3178 int64_t offset, int bytes, BdrvRequestFlags flags) 3179 { 3180 int ret; 3181 BDRVQcow2State *s = bs->opaque; 3182 3183 uint32_t head = offset % s->cluster_size; 3184 uint32_t tail = (offset + bytes) % s->cluster_size; 3185 3186 trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes); 3187 if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) { 3188 tail = 0; 3189 } 3190 3191 if (head || tail) { 3192 uint64_t off; 3193 unsigned int nr; 3194 3195 assert(head + bytes <= s->cluster_size); 3196 3197 /* check whether remainder of cluster already reads as zero */ 3198 if (!(is_zero(bs, offset - head, head) && 3199 is_zero(bs, offset + bytes, 3200 tail ? s->cluster_size - tail : 0))) { 3201 return -ENOTSUP; 3202 } 3203 3204 qemu_co_mutex_lock(&s->lock); 3205 /* We can have new write after previous check */ 3206 offset = QEMU_ALIGN_DOWN(offset, s->cluster_size); 3207 bytes = s->cluster_size; 3208 nr = s->cluster_size; 3209 ret = qcow2_get_cluster_offset(bs, offset, &nr, &off); 3210 if (ret != QCOW2_CLUSTER_UNALLOCATED && 3211 ret != QCOW2_CLUSTER_ZERO_PLAIN && 3212 ret != QCOW2_CLUSTER_ZERO_ALLOC) { 3213 qemu_co_mutex_unlock(&s->lock); 3214 return -ENOTSUP; 3215 } 3216 } else { 3217 qemu_co_mutex_lock(&s->lock); 3218 } 3219 3220 trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes); 3221 3222 /* Whatever is left can use real zero clusters */ 3223 ret = qcow2_cluster_zeroize(bs, offset, bytes, flags); 3224 qemu_co_mutex_unlock(&s->lock); 3225 3226 return ret; 3227 } 3228 3229 static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, 3230 int64_t offset, int bytes) 3231 { 3232 int ret; 3233 BDRVQcow2State *s = bs->opaque; 3234 3235 if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) { 3236 assert(bytes < s->cluster_size); 3237 /* Ignore partial clusters, except for the special case of the 3238 * complete partial cluster at the end of an unaligned file */ 3239 if (!QEMU_IS_ALIGNED(offset, s->cluster_size) || 3240 offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) { 3241 return -ENOTSUP; 3242 } 3243 } 3244 3245 qemu_co_mutex_lock(&s->lock); 3246 ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST, 3247 false); 3248 qemu_co_mutex_unlock(&s->lock); 3249 return ret; 3250 } 3251 3252 static int coroutine_fn 3253 qcow2_co_copy_range_from(BlockDriverState *bs, 3254 BdrvChild *src, uint64_t src_offset, 3255 BdrvChild *dst, uint64_t dst_offset, 3256 uint64_t bytes, BdrvRequestFlags flags) 3257 { 3258 BDRVQcow2State *s = bs->opaque; 3259 int ret; 3260 unsigned int cur_bytes; /* number of bytes in current iteration */ 3261 BdrvChild *child = NULL; 3262 BdrvRequestFlags cur_flags; 3263 3264 assert(!bs->encrypted); 3265 qemu_co_mutex_lock(&s->lock); 3266 3267 while (bytes != 0) { 3268 uint64_t copy_offset = 0; 3269 /* prepare next request */ 3270 cur_bytes = MIN(bytes, INT_MAX); 3271 cur_flags = flags; 3272 3273 ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset); 3274 if (ret < 0) { 3275 goto out; 3276 } 3277 3278 switch (ret) { 3279 case QCOW2_CLUSTER_UNALLOCATED: 3280 if (bs->backing && bs->backing->bs) { 3281 int64_t backing_length = bdrv_getlength(bs->backing->bs); 3282 if (src_offset >= backing_length) { 3283 cur_flags |= BDRV_REQ_ZERO_WRITE; 3284 } else { 3285 child = bs->backing; 3286 cur_bytes = MIN(cur_bytes, backing_length - src_offset); 3287 copy_offset = src_offset; 3288 } 3289 } else { 3290 cur_flags |= BDRV_REQ_ZERO_WRITE; 3291 } 3292 break; 3293 3294 case QCOW2_CLUSTER_ZERO_PLAIN: 3295 case QCOW2_CLUSTER_ZERO_ALLOC: 3296 cur_flags |= BDRV_REQ_ZERO_WRITE; 3297 break; 3298 3299 case QCOW2_CLUSTER_COMPRESSED: 3300 ret = -ENOTSUP; 3301 goto out; 3302 break; 3303 3304 case QCOW2_CLUSTER_NORMAL: 3305 child = bs->file; 3306 copy_offset += offset_into_cluster(s, src_offset); 3307 if ((copy_offset & 511) != 0) { 3308 ret = -EIO; 3309 goto out; 3310 } 3311 break; 3312 3313 default: 3314 abort(); 3315 } 3316 qemu_co_mutex_unlock(&s->lock); 3317 ret = bdrv_co_copy_range_from(child, 3318 copy_offset, 3319 dst, dst_offset, 3320 cur_bytes, cur_flags); 3321 qemu_co_mutex_lock(&s->lock); 3322 if (ret < 0) { 3323 goto out; 3324 } 3325 3326 bytes -= cur_bytes; 3327 src_offset += cur_bytes; 3328 dst_offset += cur_bytes; 3329 } 3330 ret = 0; 3331 3332 out: 3333 qemu_co_mutex_unlock(&s->lock); 3334 return ret; 3335 } 3336 3337 static int coroutine_fn 3338 qcow2_co_copy_range_to(BlockDriverState *bs, 3339 BdrvChild *src, uint64_t src_offset, 3340 BdrvChild *dst, uint64_t dst_offset, 3341 uint64_t bytes, BdrvRequestFlags flags) 3342 { 3343 BDRVQcow2State *s = bs->opaque; 3344 int offset_in_cluster; 3345 int ret; 3346 unsigned int cur_bytes; /* number of sectors in current iteration */ 3347 uint64_t cluster_offset; 3348 uint8_t *cluster_data = NULL; 3349 QCowL2Meta *l2meta = NULL; 3350 3351 assert(!bs->encrypted); 3352 s->cluster_cache_offset = -1; /* disable compressed cache */ 3353 3354 qemu_co_mutex_lock(&s->lock); 3355 3356 while (bytes != 0) { 3357 3358 l2meta = NULL; 3359 3360 offset_in_cluster = offset_into_cluster(s, dst_offset); 3361 cur_bytes = MIN(bytes, INT_MAX); 3362 3363 /* TODO: 3364 * If src->bs == dst->bs, we could simply copy by incrementing 3365 * the refcnt, without copying user data. 3366 * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */ 3367 ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes, 3368 &cluster_offset, &l2meta); 3369 if (ret < 0) { 3370 goto fail; 3371 } 3372 3373 assert((cluster_offset & 511) == 0); 3374 3375 ret = qcow2_pre_write_overlap_check(bs, 0, 3376 cluster_offset + offset_in_cluster, cur_bytes); 3377 if (ret < 0) { 3378 goto fail; 3379 } 3380 3381 qemu_co_mutex_unlock(&s->lock); 3382 ret = bdrv_co_copy_range_to(src, src_offset, 3383 bs->file, 3384 cluster_offset + offset_in_cluster, 3385 cur_bytes, flags); 3386 qemu_co_mutex_lock(&s->lock); 3387 if (ret < 0) { 3388 goto fail; 3389 } 3390 3391 ret = qcow2_handle_l2meta(bs, &l2meta, true); 3392 if (ret) { 3393 goto fail; 3394 } 3395 3396 bytes -= cur_bytes; 3397 src_offset += cur_bytes; 3398 dst_offset += cur_bytes; 3399 } 3400 ret = 0; 3401 3402 fail: 3403 qcow2_handle_l2meta(bs, &l2meta, false); 3404 3405 qemu_co_mutex_unlock(&s->lock); 3406 3407 qemu_vfree(cluster_data); 3408 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 3409 3410 return ret; 3411 } 3412 3413 static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, 3414 PreallocMode prealloc, Error **errp) 3415 { 3416 BDRVQcow2State *s = bs->opaque; 3417 uint64_t old_length; 3418 int64_t new_l1_size; 3419 int ret; 3420 3421 if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA && 3422 prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL) 3423 { 3424 error_setg(errp, "Unsupported preallocation mode '%s'", 3425 PreallocMode_str(prealloc)); 3426 return -ENOTSUP; 3427 } 3428 3429 if (offset & 511) { 3430 error_setg(errp, "The new size must be a multiple of 512"); 3431 return -EINVAL; 3432 } 3433 3434 qemu_co_mutex_lock(&s->lock); 3435 3436 /* cannot proceed if image has snapshots */ 3437 if (s->nb_snapshots) { 3438 error_setg(errp, "Can't resize an image which has snapshots"); 3439 ret = -ENOTSUP; 3440 goto fail; 3441 } 3442 3443 /* cannot proceed if image has bitmaps */ 3444 if (s->nb_bitmaps) { 3445 /* TODO: resize bitmaps in the image */ 3446 error_setg(errp, "Can't resize an image which has bitmaps"); 3447 ret = -ENOTSUP; 3448 goto fail; 3449 } 3450 3451 old_length = bs->total_sectors * 512; 3452 new_l1_size = size_to_l1(s, offset); 3453 3454 if (offset < old_length) { 3455 int64_t last_cluster, old_file_size; 3456 if (prealloc != PREALLOC_MODE_OFF) { 3457 error_setg(errp, 3458 "Preallocation can't be used for shrinking an image"); 3459 ret = -EINVAL; 3460 goto fail; 3461 } 3462 3463 ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), 3464 old_length - ROUND_UP(offset, 3465 s->cluster_size), 3466 QCOW2_DISCARD_ALWAYS, true); 3467 if (ret < 0) { 3468 error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); 3469 goto fail; 3470 } 3471 3472 ret = qcow2_shrink_l1_table(bs, new_l1_size); 3473 if (ret < 0) { 3474 error_setg_errno(errp, -ret, 3475 "Failed to reduce the number of L2 tables"); 3476 goto fail; 3477 } 3478 3479 ret = qcow2_shrink_reftable(bs); 3480 if (ret < 0) { 3481 error_setg_errno(errp, -ret, 3482 "Failed to discard unused refblocks"); 3483 goto fail; 3484 } 3485 3486 old_file_size = bdrv_getlength(bs->file->bs); 3487 if (old_file_size < 0) { 3488 error_setg_errno(errp, -old_file_size, 3489 "Failed to inquire current file length"); 3490 ret = old_file_size; 3491 goto fail; 3492 } 3493 last_cluster = qcow2_get_last_cluster(bs, old_file_size); 3494 if (last_cluster < 0) { 3495 error_setg_errno(errp, -last_cluster, 3496 "Failed to find the last cluster"); 3497 ret = last_cluster; 3498 goto fail; 3499 } 3500 if ((last_cluster + 1) * s->cluster_size < old_file_size) { 3501 Error *local_err = NULL; 3502 3503 bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, 3504 PREALLOC_MODE_OFF, &local_err); 3505 if (local_err) { 3506 warn_reportf_err(local_err, 3507 "Failed to truncate the tail of the image: "); 3508 } 3509 } 3510 } else { 3511 ret = qcow2_grow_l1_table(bs, new_l1_size, true); 3512 if (ret < 0) { 3513 error_setg_errno(errp, -ret, "Failed to grow the L1 table"); 3514 goto fail; 3515 } 3516 } 3517 3518 switch (prealloc) { 3519 case PREALLOC_MODE_OFF: 3520 break; 3521 3522 case PREALLOC_MODE_METADATA: 3523 ret = preallocate_co(bs, old_length, offset); 3524 if (ret < 0) { 3525 error_setg_errno(errp, -ret, "Preallocation failed"); 3526 goto fail; 3527 } 3528 break; 3529 3530 case PREALLOC_MODE_FALLOC: 3531 case PREALLOC_MODE_FULL: 3532 { 3533 int64_t allocation_start, host_offset, guest_offset; 3534 int64_t clusters_allocated; 3535 int64_t old_file_size, new_file_size; 3536 uint64_t nb_new_data_clusters, nb_new_l2_tables; 3537 3538 old_file_size = bdrv_getlength(bs->file->bs); 3539 if (old_file_size < 0) { 3540 error_setg_errno(errp, -old_file_size, 3541 "Failed to inquire current file length"); 3542 ret = old_file_size; 3543 goto fail; 3544 } 3545 old_file_size = ROUND_UP(old_file_size, s->cluster_size); 3546 3547 nb_new_data_clusters = DIV_ROUND_UP(offset - old_length, 3548 s->cluster_size); 3549 3550 /* This is an overestimation; we will not actually allocate space for 3551 * these in the file but just make sure the new refcount structures are 3552 * able to cover them so we will not have to allocate new refblocks 3553 * while entering the data blocks in the potentially new L2 tables. 3554 * (We do not actually care where the L2 tables are placed. Maybe they 3555 * are already allocated or they can be placed somewhere before 3556 * @old_file_size. It does not matter because they will be fully 3557 * allocated automatically, so they do not need to be covered by the 3558 * preallocation. All that matters is that we will not have to allocate 3559 * new refcount structures for them.) */ 3560 nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters, 3561 s->cluster_size / sizeof(uint64_t)); 3562 /* The cluster range may not be aligned to L2 boundaries, so add one L2 3563 * table for a potential head/tail */ 3564 nb_new_l2_tables++; 3565 3566 allocation_start = qcow2_refcount_area(bs, old_file_size, 3567 nb_new_data_clusters + 3568 nb_new_l2_tables, 3569 true, 0, 0); 3570 if (allocation_start < 0) { 3571 error_setg_errno(errp, -allocation_start, 3572 "Failed to resize refcount structures"); 3573 ret = allocation_start; 3574 goto fail; 3575 } 3576 3577 clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, 3578 nb_new_data_clusters); 3579 if (clusters_allocated < 0) { 3580 error_setg_errno(errp, -clusters_allocated, 3581 "Failed to allocate data clusters"); 3582 ret = clusters_allocated; 3583 goto fail; 3584 } 3585 3586 assert(clusters_allocated == nb_new_data_clusters); 3587 3588 /* Allocate the data area */ 3589 new_file_size = allocation_start + 3590 nb_new_data_clusters * s->cluster_size; 3591 ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp); 3592 if (ret < 0) { 3593 error_prepend(errp, "Failed to resize underlying file: "); 3594 qcow2_free_clusters(bs, allocation_start, 3595 nb_new_data_clusters * s->cluster_size, 3596 QCOW2_DISCARD_OTHER); 3597 goto fail; 3598 } 3599 3600 /* Create the necessary L2 entries */ 3601 host_offset = allocation_start; 3602 guest_offset = old_length; 3603 while (nb_new_data_clusters) { 3604 int64_t nb_clusters = MIN( 3605 nb_new_data_clusters, 3606 s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset)); 3607 QCowL2Meta allocation = { 3608 .offset = guest_offset, 3609 .alloc_offset = host_offset, 3610 .nb_clusters = nb_clusters, 3611 }; 3612 qemu_co_queue_init(&allocation.dependent_requests); 3613 3614 ret = qcow2_alloc_cluster_link_l2(bs, &allocation); 3615 if (ret < 0) { 3616 error_setg_errno(errp, -ret, "Failed to update L2 tables"); 3617 qcow2_free_clusters(bs, host_offset, 3618 nb_new_data_clusters * s->cluster_size, 3619 QCOW2_DISCARD_OTHER); 3620 goto fail; 3621 } 3622 3623 guest_offset += nb_clusters * s->cluster_size; 3624 host_offset += nb_clusters * s->cluster_size; 3625 nb_new_data_clusters -= nb_clusters; 3626 } 3627 break; 3628 } 3629 3630 default: 3631 g_assert_not_reached(); 3632 } 3633 3634 if (prealloc != PREALLOC_MODE_OFF) { 3635 /* Flush metadata before actually changing the image size */ 3636 ret = qcow2_write_caches(bs); 3637 if (ret < 0) { 3638 error_setg_errno(errp, -ret, 3639 "Failed to flush the preallocated area to disk"); 3640 goto fail; 3641 } 3642 } 3643 3644 /* write updated header.size */ 3645 offset = cpu_to_be64(offset); 3646 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), 3647 &offset, sizeof(uint64_t)); 3648 if (ret < 0) { 3649 error_setg_errno(errp, -ret, "Failed to update the image size"); 3650 goto fail; 3651 } 3652 3653 s->l1_vm_state_index = new_l1_size; 3654 ret = 0; 3655 fail: 3656 qemu_co_mutex_unlock(&s->lock); 3657 return ret; 3658 } 3659 3660 /* 3661 * qcow2_compress() 3662 * 3663 * @dest - destination buffer, at least of @size-1 bytes 3664 * @src - source buffer, @size bytes 3665 * 3666 * Returns: compressed size on success 3667 * -1 if compression is inefficient 3668 * -2 on any other error 3669 */ 3670 static ssize_t qcow2_compress(void *dest, const void *src, size_t size) 3671 { 3672 ssize_t ret; 3673 z_stream strm; 3674 3675 /* best compression, small window, no zlib header */ 3676 memset(&strm, 0, sizeof(strm)); 3677 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 3678 -12, 9, Z_DEFAULT_STRATEGY); 3679 if (ret != 0) { 3680 return -2; 3681 } 3682 3683 /* strm.next_in is not const in old zlib versions, such as those used on 3684 * OpenBSD/NetBSD, so cast the const away */ 3685 strm.avail_in = size; 3686 strm.next_in = (void *) src; 3687 strm.avail_out = size - 1; 3688 strm.next_out = dest; 3689 3690 ret = deflate(&strm, Z_FINISH); 3691 if (ret == Z_STREAM_END) { 3692 ret = size - 1 - strm.avail_out; 3693 } else { 3694 ret = (ret == Z_OK ? -1 : -2); 3695 } 3696 3697 deflateEnd(&strm); 3698 3699 return ret; 3700 } 3701 3702 #define MAX_COMPRESS_THREADS 4 3703 3704 typedef struct Qcow2CompressData { 3705 void *dest; 3706 const void *src; 3707 size_t size; 3708 ssize_t ret; 3709 } Qcow2CompressData; 3710 3711 static int qcow2_compress_pool_func(void *opaque) 3712 { 3713 Qcow2CompressData *data = opaque; 3714 3715 data->ret = qcow2_compress(data->dest, data->src, data->size); 3716 3717 return 0; 3718 } 3719 3720 static void qcow2_compress_complete(void *opaque, int ret) 3721 { 3722 qemu_coroutine_enter(opaque); 3723 } 3724 3725 /* See qcow2_compress definition for parameters description */ 3726 static ssize_t qcow2_co_compress(BlockDriverState *bs, 3727 void *dest, const void *src, size_t size) 3728 { 3729 BDRVQcow2State *s = bs->opaque; 3730 BlockAIOCB *acb; 3731 ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); 3732 Qcow2CompressData arg = { 3733 .dest = dest, 3734 .src = src, 3735 .size = size, 3736 }; 3737 3738 while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) { 3739 qemu_co_queue_wait(&s->compress_wait_queue, NULL); 3740 } 3741 3742 s->nb_compress_threads++; 3743 acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg, 3744 qcow2_compress_complete, 3745 qemu_coroutine_self()); 3746 3747 if (!acb) { 3748 s->nb_compress_threads--; 3749 return -EINVAL; 3750 } 3751 qemu_coroutine_yield(); 3752 s->nb_compress_threads--; 3753 qemu_co_queue_next(&s->compress_wait_queue); 3754 3755 return arg.ret; 3756 } 3757 3758 /* XXX: put compressed sectors first, then all the cluster aligned 3759 tables to avoid losing bytes in alignment */ 3760 static coroutine_fn int 3761 qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, 3762 uint64_t bytes, QEMUIOVector *qiov) 3763 { 3764 BDRVQcow2State *s = bs->opaque; 3765 QEMUIOVector hd_qiov; 3766 struct iovec iov; 3767 int ret; 3768 size_t out_len; 3769 uint8_t *buf, *out_buf; 3770 int64_t cluster_offset; 3771 3772 if (bytes == 0) { 3773 /* align end of file to a sector boundary to ease reading with 3774 sector based I/Os */ 3775 cluster_offset = bdrv_getlength(bs->file->bs); 3776 if (cluster_offset < 0) { 3777 return cluster_offset; 3778 } 3779 return bdrv_co_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, 3780 NULL); 3781 } 3782 3783 if (offset_into_cluster(s, offset)) { 3784 return -EINVAL; 3785 } 3786 3787 buf = qemu_blockalign(bs, s->cluster_size); 3788 if (bytes != s->cluster_size) { 3789 if (bytes > s->cluster_size || 3790 offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS) 3791 { 3792 qemu_vfree(buf); 3793 return -EINVAL; 3794 } 3795 /* Zero-pad last write if image size is not cluster aligned */ 3796 memset(buf + bytes, 0, s->cluster_size - bytes); 3797 } 3798 qemu_iovec_to_buf(qiov, 0, buf, bytes); 3799 3800 out_buf = g_malloc(s->cluster_size); 3801 3802 out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size); 3803 if (out_len == -2) { 3804 ret = -EINVAL; 3805 goto fail; 3806 } else if (out_len == -1) { 3807 /* could not compress: write normal cluster */ 3808 ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0); 3809 if (ret < 0) { 3810 goto fail; 3811 } 3812 goto success; 3813 } 3814 3815 qemu_co_mutex_lock(&s->lock); 3816 cluster_offset = 3817 qcow2_alloc_compressed_cluster_offset(bs, offset, out_len); 3818 if (!cluster_offset) { 3819 qemu_co_mutex_unlock(&s->lock); 3820 ret = -EIO; 3821 goto fail; 3822 } 3823 cluster_offset &= s->cluster_offset_mask; 3824 3825 ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len); 3826 qemu_co_mutex_unlock(&s->lock); 3827 if (ret < 0) { 3828 goto fail; 3829 } 3830 3831 iov = (struct iovec) { 3832 .iov_base = out_buf, 3833 .iov_len = out_len, 3834 }; 3835 qemu_iovec_init_external(&hd_qiov, &iov, 1); 3836 3837 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); 3838 ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0); 3839 if (ret < 0) { 3840 goto fail; 3841 } 3842 success: 3843 ret = 0; 3844 fail: 3845 qemu_vfree(buf); 3846 g_free(out_buf); 3847 return ret; 3848 } 3849 3850 static int make_completely_empty(BlockDriverState *bs) 3851 { 3852 BDRVQcow2State *s = bs->opaque; 3853 Error *local_err = NULL; 3854 int ret, l1_clusters; 3855 int64_t offset; 3856 uint64_t *new_reftable = NULL; 3857 uint64_t rt_entry, l1_size2; 3858 struct { 3859 uint64_t l1_offset; 3860 uint64_t reftable_offset; 3861 uint32_t reftable_clusters; 3862 } QEMU_PACKED l1_ofs_rt_ofs_cls; 3863 3864 ret = qcow2_cache_empty(bs, s->l2_table_cache); 3865 if (ret < 0) { 3866 goto fail; 3867 } 3868 3869 ret = qcow2_cache_empty(bs, s->refcount_block_cache); 3870 if (ret < 0) { 3871 goto fail; 3872 } 3873 3874 /* Refcounts will be broken utterly */ 3875 ret = qcow2_mark_dirty(bs); 3876 if (ret < 0) { 3877 goto fail; 3878 } 3879 3880 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 3881 3882 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 3883 l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t); 3884 3885 /* After this call, neither the in-memory nor the on-disk refcount 3886 * information accurately describe the actual references */ 3887 3888 ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset, 3889 l1_clusters * s->cluster_size, 0); 3890 if (ret < 0) { 3891 goto fail_broken_refcounts; 3892 } 3893 memset(s->l1_table, 0, l1_size2); 3894 3895 BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE); 3896 3897 /* Overwrite enough clusters at the beginning of the sectors to place 3898 * the refcount table, a refcount block and the L1 table in; this may 3899 * overwrite parts of the existing refcount and L1 table, which is not 3900 * an issue because the dirty flag is set, complete data loss is in fact 3901 * desired and partial data loss is consequently fine as well */ 3902 ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size, 3903 (2 + l1_clusters) * s->cluster_size, 0); 3904 /* This call (even if it failed overall) may have overwritten on-disk 3905 * refcount structures; in that case, the in-memory refcount information 3906 * will probably differ from the on-disk information which makes the BDS 3907 * unusable */ 3908 if (ret < 0) { 3909 goto fail_broken_refcounts; 3910 } 3911 3912 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 3913 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); 3914 3915 /* "Create" an empty reftable (one cluster) directly after the image 3916 * header and an empty L1 table three clusters after the image header; 3917 * the cluster between those two will be used as the first refblock */ 3918 l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size); 3919 l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size); 3920 l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1); 3921 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset), 3922 &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls)); 3923 if (ret < 0) { 3924 goto fail_broken_refcounts; 3925 } 3926 3927 s->l1_table_offset = 3 * s->cluster_size; 3928 3929 new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t)); 3930 if (!new_reftable) { 3931 ret = -ENOMEM; 3932 goto fail_broken_refcounts; 3933 } 3934 3935 s->refcount_table_offset = s->cluster_size; 3936 s->refcount_table_size = s->cluster_size / sizeof(uint64_t); 3937 s->max_refcount_table_index = 0; 3938 3939 g_free(s->refcount_table); 3940 s->refcount_table = new_reftable; 3941 new_reftable = NULL; 3942 3943 /* Now the in-memory refcount information again corresponds to the on-disk 3944 * information (reftable is empty and no refblocks (the refblock cache is 3945 * empty)); however, this means some clusters (e.g. the image header) are 3946 * referenced, but not refcounted, but the normal qcow2 code assumes that 3947 * the in-memory information is always correct */ 3948 3949 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); 3950 3951 /* Enter the first refblock into the reftable */ 3952 rt_entry = cpu_to_be64(2 * s->cluster_size); 3953 ret = bdrv_pwrite_sync(bs->file, s->cluster_size, 3954 &rt_entry, sizeof(rt_entry)); 3955 if (ret < 0) { 3956 goto fail_broken_refcounts; 3957 } 3958 s->refcount_table[0] = 2 * s->cluster_size; 3959 3960 s->free_cluster_index = 0; 3961 assert(3 + l1_clusters <= s->refcount_block_size); 3962 offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2); 3963 if (offset < 0) { 3964 ret = offset; 3965 goto fail_broken_refcounts; 3966 } else if (offset > 0) { 3967 error_report("First cluster in emptied image is in use"); 3968 abort(); 3969 } 3970 3971 /* Now finally the in-memory information corresponds to the on-disk 3972 * structures and is correct */ 3973 ret = qcow2_mark_clean(bs); 3974 if (ret < 0) { 3975 goto fail; 3976 } 3977 3978 ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, 3979 PREALLOC_MODE_OFF, &local_err); 3980 if (ret < 0) { 3981 error_report_err(local_err); 3982 goto fail; 3983 } 3984 3985 return 0; 3986 3987 fail_broken_refcounts: 3988 /* The BDS is unusable at this point. If we wanted to make it usable, we 3989 * would have to call qcow2_refcount_close(), qcow2_refcount_init(), 3990 * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init() 3991 * again. However, because the functions which could have caused this error 3992 * path to be taken are used by those functions as well, it's very likely 3993 * that that sequence will fail as well. Therefore, just eject the BDS. */ 3994 bs->drv = NULL; 3995 3996 fail: 3997 g_free(new_reftable); 3998 return ret; 3999 } 4000 4001 static int qcow2_make_empty(BlockDriverState *bs) 4002 { 4003 BDRVQcow2State *s = bs->opaque; 4004 uint64_t offset, end_offset; 4005 int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size); 4006 int l1_clusters, ret = 0; 4007 4008 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 4009 4010 if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps && 4011 3 + l1_clusters <= s->refcount_block_size && 4012 s->crypt_method_header != QCOW_CRYPT_LUKS) { 4013 /* The following function only works for qcow2 v3 images (it 4014 * requires the dirty flag) and only as long as there are no 4015 * features that reserve extra clusters (such as snapshots, 4016 * LUKS header, or persistent bitmaps), because it completely 4017 * empties the image. Furthermore, the L1 table and three 4018 * additional clusters (image header, refcount table, one 4019 * refcount block) have to fit inside one refcount block. */ 4020 return make_completely_empty(bs); 4021 } 4022 4023 /* This fallback code simply discards every active cluster; this is slow, 4024 * but works in all cases */ 4025 end_offset = bs->total_sectors * BDRV_SECTOR_SIZE; 4026 for (offset = 0; offset < end_offset; offset += step) { 4027 /* As this function is generally used after committing an external 4028 * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the 4029 * default action for this kind of discard is to pass the discard, 4030 * which will ideally result in an actually smaller image file, as 4031 * is probably desired. */ 4032 ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset), 4033 QCOW2_DISCARD_SNAPSHOT, true); 4034 if (ret < 0) { 4035 break; 4036 } 4037 } 4038 4039 return ret; 4040 } 4041 4042 static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) 4043 { 4044 BDRVQcow2State *s = bs->opaque; 4045 int ret; 4046 4047 qemu_co_mutex_lock(&s->lock); 4048 ret = qcow2_write_caches(bs); 4049 qemu_co_mutex_unlock(&s->lock); 4050 4051 return ret; 4052 } 4053 4054 static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, 4055 Error **errp) 4056 { 4057 Error *local_err = NULL; 4058 BlockMeasureInfo *info; 4059 uint64_t required = 0; /* bytes that contribute to required size */ 4060 uint64_t virtual_size; /* disk size as seen by guest */ 4061 uint64_t refcount_bits; 4062 uint64_t l2_tables; 4063 size_t cluster_size; 4064 int version; 4065 char *optstr; 4066 PreallocMode prealloc; 4067 bool has_backing_file; 4068 4069 /* Parse image creation options */ 4070 cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err); 4071 if (local_err) { 4072 goto err; 4073 } 4074 4075 version = qcow2_opt_get_version_del(opts, &local_err); 4076 if (local_err) { 4077 goto err; 4078 } 4079 4080 refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err); 4081 if (local_err) { 4082 goto err; 4083 } 4084 4085 optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 4086 prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr, 4087 PREALLOC_MODE_OFF, &local_err); 4088 g_free(optstr); 4089 if (local_err) { 4090 goto err; 4091 } 4092 4093 optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); 4094 has_backing_file = !!optstr; 4095 g_free(optstr); 4096 4097 virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 4098 virtual_size = ROUND_UP(virtual_size, cluster_size); 4099 4100 /* Check that virtual disk size is valid */ 4101 l2_tables = DIV_ROUND_UP(virtual_size / cluster_size, 4102 cluster_size / sizeof(uint64_t)); 4103 if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) { 4104 error_setg(&local_err, "The image size is too large " 4105 "(try using a larger cluster size)"); 4106 goto err; 4107 } 4108 4109 /* Account for input image */ 4110 if (in_bs) { 4111 int64_t ssize = bdrv_getlength(in_bs); 4112 if (ssize < 0) { 4113 error_setg_errno(&local_err, -ssize, 4114 "Unable to get image virtual_size"); 4115 goto err; 4116 } 4117 4118 virtual_size = ROUND_UP(ssize, cluster_size); 4119 4120 if (has_backing_file) { 4121 /* We don't how much of the backing chain is shared by the input 4122 * image and the new image file. In the worst case the new image's 4123 * backing file has nothing in common with the input image. Be 4124 * conservative and assume all clusters need to be written. 4125 */ 4126 required = virtual_size; 4127 } else { 4128 int64_t offset; 4129 int64_t pnum = 0; 4130 4131 for (offset = 0; offset < ssize; offset += pnum) { 4132 int ret; 4133 4134 ret = bdrv_block_status_above(in_bs, NULL, offset, 4135 ssize - offset, &pnum, NULL, 4136 NULL); 4137 if (ret < 0) { 4138 error_setg_errno(&local_err, -ret, 4139 "Unable to get block status"); 4140 goto err; 4141 } 4142 4143 if (ret & BDRV_BLOCK_ZERO) { 4144 /* Skip zero regions (safe with no backing file) */ 4145 } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == 4146 (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { 4147 /* Extend pnum to end of cluster for next iteration */ 4148 pnum = ROUND_UP(offset + pnum, cluster_size) - offset; 4149 4150 /* Count clusters we've seen */ 4151 required += offset % cluster_size + pnum; 4152 } 4153 } 4154 } 4155 } 4156 4157 /* Take into account preallocation. Nothing special is needed for 4158 * PREALLOC_MODE_METADATA since metadata is always counted. 4159 */ 4160 if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { 4161 required = virtual_size; 4162 } 4163 4164 info = g_new(BlockMeasureInfo, 1); 4165 info->fully_allocated = 4166 qcow2_calc_prealloc_size(virtual_size, cluster_size, 4167 ctz32(refcount_bits)); 4168 4169 /* Remove data clusters that are not required. This overestimates the 4170 * required size because metadata needed for the fully allocated file is 4171 * still counted. 4172 */ 4173 info->required = info->fully_allocated - virtual_size + required; 4174 return info; 4175 4176 err: 4177 error_propagate(errp, local_err); 4178 return NULL; 4179 } 4180 4181 static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 4182 { 4183 BDRVQcow2State *s = bs->opaque; 4184 bdi->unallocated_blocks_are_zero = true; 4185 bdi->cluster_size = s->cluster_size; 4186 bdi->vm_state_offset = qcow2_vm_state_offset(s); 4187 return 0; 4188 } 4189 4190 static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) 4191 { 4192 BDRVQcow2State *s = bs->opaque; 4193 ImageInfoSpecific *spec_info; 4194 QCryptoBlockInfo *encrypt_info = NULL; 4195 4196 if (s->crypto != NULL) { 4197 encrypt_info = qcrypto_block_get_info(s->crypto, &error_abort); 4198 } 4199 4200 spec_info = g_new(ImageInfoSpecific, 1); 4201 *spec_info = (ImageInfoSpecific){ 4202 .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, 4203 .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1), 4204 }; 4205 if (s->qcow_version == 2) { 4206 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 4207 .compat = g_strdup("0.10"), 4208 .refcount_bits = s->refcount_bits, 4209 }; 4210 } else if (s->qcow_version == 3) { 4211 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 4212 .compat = g_strdup("1.1"), 4213 .lazy_refcounts = s->compatible_features & 4214 QCOW2_COMPAT_LAZY_REFCOUNTS, 4215 .has_lazy_refcounts = true, 4216 .corrupt = s->incompatible_features & 4217 QCOW2_INCOMPAT_CORRUPT, 4218 .has_corrupt = true, 4219 .refcount_bits = s->refcount_bits, 4220 }; 4221 } else { 4222 /* if this assertion fails, this probably means a new version was 4223 * added without having it covered here */ 4224 assert(false); 4225 } 4226 4227 if (encrypt_info) { 4228 ImageInfoSpecificQCow2Encryption *qencrypt = 4229 g_new(ImageInfoSpecificQCow2Encryption, 1); 4230 switch (encrypt_info->format) { 4231 case Q_CRYPTO_BLOCK_FORMAT_QCOW: 4232 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES; 4233 break; 4234 case Q_CRYPTO_BLOCK_FORMAT_LUKS: 4235 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS; 4236 qencrypt->u.luks = encrypt_info->u.luks; 4237 break; 4238 default: 4239 abort(); 4240 } 4241 /* Since we did shallow copy above, erase any pointers 4242 * in the original info */ 4243 memset(&encrypt_info->u, 0, sizeof(encrypt_info->u)); 4244 qapi_free_QCryptoBlockInfo(encrypt_info); 4245 4246 spec_info->u.qcow2.data->has_encrypt = true; 4247 spec_info->u.qcow2.data->encrypt = qencrypt; 4248 } 4249 4250 return spec_info; 4251 } 4252 4253 static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 4254 int64_t pos) 4255 { 4256 BDRVQcow2State *s = bs->opaque; 4257 4258 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); 4259 return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos, 4260 qiov->size, qiov, 0); 4261 } 4262 4263 static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 4264 int64_t pos) 4265 { 4266 BDRVQcow2State *s = bs->opaque; 4267 4268 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); 4269 return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos, 4270 qiov->size, qiov, 0); 4271 } 4272 4273 /* 4274 * Downgrades an image's version. To achieve this, any incompatible features 4275 * have to be removed. 4276 */ 4277 static int qcow2_downgrade(BlockDriverState *bs, int target_version, 4278 BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 4279 Error **errp) 4280 { 4281 BDRVQcow2State *s = bs->opaque; 4282 int current_version = s->qcow_version; 4283 int ret; 4284 4285 /* This is qcow2_downgrade(), not qcow2_upgrade() */ 4286 assert(target_version < current_version); 4287 4288 /* There are no other versions (now) that you can downgrade to */ 4289 assert(target_version == 2); 4290 4291 if (s->refcount_order != 4) { 4292 error_setg(errp, "compat=0.10 requires refcount_bits=16"); 4293 return -ENOTSUP; 4294 } 4295 4296 /* clear incompatible features */ 4297 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 4298 ret = qcow2_mark_clean(bs); 4299 if (ret < 0) { 4300 error_setg_errno(errp, -ret, "Failed to make the image clean"); 4301 return ret; 4302 } 4303 } 4304 4305 /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in 4306 * the first place; if that happens nonetheless, returning -ENOTSUP is the 4307 * best thing to do anyway */ 4308 4309 if (s->incompatible_features) { 4310 error_setg(errp, "Cannot downgrade an image with incompatible features " 4311 "%#" PRIx64 " set", s->incompatible_features); 4312 return -ENOTSUP; 4313 } 4314 4315 /* since we can ignore compatible features, we can set them to 0 as well */ 4316 s->compatible_features = 0; 4317 /* if lazy refcounts have been used, they have already been fixed through 4318 * clearing the dirty flag */ 4319 4320 /* clearing autoclear features is trivial */ 4321 s->autoclear_features = 0; 4322 4323 ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque); 4324 if (ret < 0) { 4325 error_setg_errno(errp, -ret, "Failed to turn zero into data clusters"); 4326 return ret; 4327 } 4328 4329 s->qcow_version = target_version; 4330 ret = qcow2_update_header(bs); 4331 if (ret < 0) { 4332 s->qcow_version = current_version; 4333 error_setg_errno(errp, -ret, "Failed to update the image header"); 4334 return ret; 4335 } 4336 return 0; 4337 } 4338 4339 typedef enum Qcow2AmendOperation { 4340 /* This is the value Qcow2AmendHelperCBInfo::last_operation will be 4341 * statically initialized to so that the helper CB can discern the first 4342 * invocation from an operation change */ 4343 QCOW2_NO_OPERATION = 0, 4344 4345 QCOW2_CHANGING_REFCOUNT_ORDER, 4346 QCOW2_DOWNGRADING, 4347 } Qcow2AmendOperation; 4348 4349 typedef struct Qcow2AmendHelperCBInfo { 4350 /* The code coordinating the amend operations should only modify 4351 * these four fields; the rest will be managed by the CB */ 4352 BlockDriverAmendStatusCB *original_status_cb; 4353 void *original_cb_opaque; 4354 4355 Qcow2AmendOperation current_operation; 4356 4357 /* Total number of operations to perform (only set once) */ 4358 int total_operations; 4359 4360 /* The following fields are managed by the CB */ 4361 4362 /* Number of operations completed */ 4363 int operations_completed; 4364 4365 /* Cumulative offset of all completed operations */ 4366 int64_t offset_completed; 4367 4368 Qcow2AmendOperation last_operation; 4369 int64_t last_work_size; 4370 } Qcow2AmendHelperCBInfo; 4371 4372 static void qcow2_amend_helper_cb(BlockDriverState *bs, 4373 int64_t operation_offset, 4374 int64_t operation_work_size, void *opaque) 4375 { 4376 Qcow2AmendHelperCBInfo *info = opaque; 4377 int64_t current_work_size; 4378 int64_t projected_work_size; 4379 4380 if (info->current_operation != info->last_operation) { 4381 if (info->last_operation != QCOW2_NO_OPERATION) { 4382 info->offset_completed += info->last_work_size; 4383 info->operations_completed++; 4384 } 4385 4386 info->last_operation = info->current_operation; 4387 } 4388 4389 assert(info->total_operations > 0); 4390 assert(info->operations_completed < info->total_operations); 4391 4392 info->last_work_size = operation_work_size; 4393 4394 current_work_size = info->offset_completed + operation_work_size; 4395 4396 /* current_work_size is the total work size for (operations_completed + 1) 4397 * operations (which includes this one), so multiply it by the number of 4398 * operations not covered and divide it by the number of operations 4399 * covered to get a projection for the operations not covered */ 4400 projected_work_size = current_work_size * (info->total_operations - 4401 info->operations_completed - 1) 4402 / (info->operations_completed + 1); 4403 4404 info->original_status_cb(bs, info->offset_completed + operation_offset, 4405 current_work_size + projected_work_size, 4406 info->original_cb_opaque); 4407 } 4408 4409 static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, 4410 BlockDriverAmendStatusCB *status_cb, 4411 void *cb_opaque, 4412 Error **errp) 4413 { 4414 BDRVQcow2State *s = bs->opaque; 4415 int old_version = s->qcow_version, new_version = old_version; 4416 uint64_t new_size = 0; 4417 const char *backing_file = NULL, *backing_format = NULL; 4418 bool lazy_refcounts = s->use_lazy_refcounts; 4419 const char *compat = NULL; 4420 uint64_t cluster_size = s->cluster_size; 4421 bool encrypt; 4422 int encformat; 4423 int refcount_bits = s->refcount_bits; 4424 int ret; 4425 QemuOptDesc *desc = opts->list->desc; 4426 Qcow2AmendHelperCBInfo helper_cb_info; 4427 4428 while (desc && desc->name) { 4429 if (!qemu_opt_find(opts, desc->name)) { 4430 /* only change explicitly defined options */ 4431 desc++; 4432 continue; 4433 } 4434 4435 if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) { 4436 compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL); 4437 if (!compat) { 4438 /* preserve default */ 4439 } else if (!strcmp(compat, "0.10")) { 4440 new_version = 2; 4441 } else if (!strcmp(compat, "1.1")) { 4442 new_version = 3; 4443 } else { 4444 error_setg(errp, "Unknown compatibility level %s", compat); 4445 return -EINVAL; 4446 } 4447 } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) { 4448 error_setg(errp, "Cannot change preallocation mode"); 4449 return -ENOTSUP; 4450 } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { 4451 new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 4452 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) { 4453 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 4454 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) { 4455 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 4456 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) { 4457 encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, 4458 !!s->crypto); 4459 4460 if (encrypt != !!s->crypto) { 4461 error_setg(errp, 4462 "Changing the encryption flag is not supported"); 4463 return -ENOTSUP; 4464 } 4465 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) { 4466 encformat = qcow2_crypt_method_from_format( 4467 qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT)); 4468 4469 if (encformat != s->crypt_method_header) { 4470 error_setg(errp, 4471 "Changing the encryption format is not supported"); 4472 return -ENOTSUP; 4473 } 4474 } else if (g_str_has_prefix(desc->name, "encrypt.")) { 4475 error_setg(errp, 4476 "Changing the encryption parameters is not supported"); 4477 return -ENOTSUP; 4478 } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { 4479 cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 4480 cluster_size); 4481 if (cluster_size != s->cluster_size) { 4482 error_setg(errp, "Changing the cluster size is not supported"); 4483 return -ENOTSUP; 4484 } 4485 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { 4486 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, 4487 lazy_refcounts); 4488 } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { 4489 refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS, 4490 refcount_bits); 4491 4492 if (refcount_bits <= 0 || refcount_bits > 64 || 4493 !is_power_of_2(refcount_bits)) 4494 { 4495 error_setg(errp, "Refcount width must be a power of two and " 4496 "may not exceed 64 bits"); 4497 return -EINVAL; 4498 } 4499 } else { 4500 /* if this point is reached, this probably means a new option was 4501 * added without having it covered here */ 4502 abort(); 4503 } 4504 4505 desc++; 4506 } 4507 4508 helper_cb_info = (Qcow2AmendHelperCBInfo){ 4509 .original_status_cb = status_cb, 4510 .original_cb_opaque = cb_opaque, 4511 .total_operations = (new_version < old_version) 4512 + (s->refcount_bits != refcount_bits) 4513 }; 4514 4515 /* Upgrade first (some features may require compat=1.1) */ 4516 if (new_version > old_version) { 4517 s->qcow_version = new_version; 4518 ret = qcow2_update_header(bs); 4519 if (ret < 0) { 4520 s->qcow_version = old_version; 4521 error_setg_errno(errp, -ret, "Failed to update the image header"); 4522 return ret; 4523 } 4524 } 4525 4526 if (s->refcount_bits != refcount_bits) { 4527 int refcount_order = ctz32(refcount_bits); 4528 4529 if (new_version < 3 && refcount_bits != 16) { 4530 error_setg(errp, "Refcount widths other than 16 bits require " 4531 "compatibility level 1.1 or above (use compat=1.1 or " 4532 "greater)"); 4533 return -EINVAL; 4534 } 4535 4536 helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; 4537 ret = qcow2_change_refcount_order(bs, refcount_order, 4538 &qcow2_amend_helper_cb, 4539 &helper_cb_info, errp); 4540 if (ret < 0) { 4541 return ret; 4542 } 4543 } 4544 4545 if (backing_file || backing_format) { 4546 ret = qcow2_change_backing_file(bs, 4547 backing_file ?: s->image_backing_file, 4548 backing_format ?: s->image_backing_format); 4549 if (ret < 0) { 4550 error_setg_errno(errp, -ret, "Failed to change the backing file"); 4551 return ret; 4552 } 4553 } 4554 4555 if (s->use_lazy_refcounts != lazy_refcounts) { 4556 if (lazy_refcounts) { 4557 if (new_version < 3) { 4558 error_setg(errp, "Lazy refcounts only supported with " 4559 "compatibility level 1.1 and above (use compat=1.1 " 4560 "or greater)"); 4561 return -EINVAL; 4562 } 4563 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 4564 ret = qcow2_update_header(bs); 4565 if (ret < 0) { 4566 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 4567 error_setg_errno(errp, -ret, "Failed to update the image header"); 4568 return ret; 4569 } 4570 s->use_lazy_refcounts = true; 4571 } else { 4572 /* make image clean first */ 4573 ret = qcow2_mark_clean(bs); 4574 if (ret < 0) { 4575 error_setg_errno(errp, -ret, "Failed to make the image clean"); 4576 return ret; 4577 } 4578 /* now disallow lazy refcounts */ 4579 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 4580 ret = qcow2_update_header(bs); 4581 if (ret < 0) { 4582 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 4583 error_setg_errno(errp, -ret, "Failed to update the image header"); 4584 return ret; 4585 } 4586 s->use_lazy_refcounts = false; 4587 } 4588 } 4589 4590 if (new_size) { 4591 BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); 4592 ret = blk_insert_bs(blk, bs, errp); 4593 if (ret < 0) { 4594 blk_unref(blk); 4595 return ret; 4596 } 4597 4598 ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, errp); 4599 blk_unref(blk); 4600 if (ret < 0) { 4601 return ret; 4602 } 4603 } 4604 4605 /* Downgrade last (so unsupported features can be removed before) */ 4606 if (new_version < old_version) { 4607 helper_cb_info.current_operation = QCOW2_DOWNGRADING; 4608 ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb, 4609 &helper_cb_info, errp); 4610 if (ret < 0) { 4611 return ret; 4612 } 4613 } 4614 4615 return 0; 4616 } 4617 4618 /* 4619 * If offset or size are negative, respectively, they will not be included in 4620 * the BLOCK_IMAGE_CORRUPTED event emitted. 4621 * fatal will be ignored for read-only BDS; corruptions found there will always 4622 * be considered non-fatal. 4623 */ 4624 void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, 4625 int64_t size, const char *message_format, ...) 4626 { 4627 BDRVQcow2State *s = bs->opaque; 4628 const char *node_name; 4629 char *message; 4630 va_list ap; 4631 4632 fatal = fatal && bdrv_is_writable(bs); 4633 4634 if (s->signaled_corruption && 4635 (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT))) 4636 { 4637 return; 4638 } 4639 4640 va_start(ap, message_format); 4641 message = g_strdup_vprintf(message_format, ap); 4642 va_end(ap); 4643 4644 if (fatal) { 4645 fprintf(stderr, "qcow2: Marking image as corrupt: %s; further " 4646 "corruption events will be suppressed\n", message); 4647 } else { 4648 fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal " 4649 "corruption events will be suppressed\n", message); 4650 } 4651 4652 node_name = bdrv_get_node_name(bs); 4653 qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), 4654 *node_name != '\0', node_name, 4655 message, offset >= 0, offset, 4656 size >= 0, size, 4657 fatal, &error_abort); 4658 g_free(message); 4659 4660 if (fatal) { 4661 qcow2_mark_corrupt(bs); 4662 bs->drv = NULL; /* make BDS unusable */ 4663 } 4664 4665 s->signaled_corruption = true; 4666 } 4667 4668 static QemuOptsList qcow2_create_opts = { 4669 .name = "qcow2-create-opts", 4670 .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head), 4671 .desc = { 4672 { 4673 .name = BLOCK_OPT_SIZE, 4674 .type = QEMU_OPT_SIZE, 4675 .help = "Virtual disk size" 4676 }, 4677 { 4678 .name = BLOCK_OPT_COMPAT_LEVEL, 4679 .type = QEMU_OPT_STRING, 4680 .help = "Compatibility level (0.10 or 1.1)" 4681 }, 4682 { 4683 .name = BLOCK_OPT_BACKING_FILE, 4684 .type = QEMU_OPT_STRING, 4685 .help = "File name of a base image" 4686 }, 4687 { 4688 .name = BLOCK_OPT_BACKING_FMT, 4689 .type = QEMU_OPT_STRING, 4690 .help = "Image format of the base image" 4691 }, 4692 { 4693 .name = BLOCK_OPT_ENCRYPT, 4694 .type = QEMU_OPT_BOOL, 4695 .help = "Encrypt the image with format 'aes'. (Deprecated " 4696 "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", 4697 }, 4698 { 4699 .name = BLOCK_OPT_ENCRYPT_FORMAT, 4700 .type = QEMU_OPT_STRING, 4701 .help = "Encrypt the image, format choices: 'aes', 'luks'", 4702 }, 4703 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", 4704 "ID of secret providing qcow AES key or LUKS passphrase"), 4705 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."), 4706 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."), 4707 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."), 4708 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."), 4709 BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."), 4710 BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), 4711 { 4712 .name = BLOCK_OPT_CLUSTER_SIZE, 4713 .type = QEMU_OPT_SIZE, 4714 .help = "qcow2 cluster size", 4715 .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) 4716 }, 4717 { 4718 .name = BLOCK_OPT_PREALLOC, 4719 .type = QEMU_OPT_STRING, 4720 .help = "Preallocation mode (allowed values: off, metadata, " 4721 "falloc, full)" 4722 }, 4723 { 4724 .name = BLOCK_OPT_LAZY_REFCOUNTS, 4725 .type = QEMU_OPT_BOOL, 4726 .help = "Postpone refcount updates", 4727 .def_value_str = "off" 4728 }, 4729 { 4730 .name = BLOCK_OPT_REFCOUNT_BITS, 4731 .type = QEMU_OPT_NUMBER, 4732 .help = "Width of a reference count entry in bits", 4733 .def_value_str = "16" 4734 }, 4735 { /* end of list */ } 4736 } 4737 }; 4738 4739 BlockDriver bdrv_qcow2 = { 4740 .format_name = "qcow2", 4741 .instance_size = sizeof(BDRVQcow2State), 4742 .bdrv_probe = qcow2_probe, 4743 .bdrv_open = qcow2_open, 4744 .bdrv_close = qcow2_close, 4745 .bdrv_reopen_prepare = qcow2_reopen_prepare, 4746 .bdrv_reopen_commit = qcow2_reopen_commit, 4747 .bdrv_reopen_abort = qcow2_reopen_abort, 4748 .bdrv_join_options = qcow2_join_options, 4749 .bdrv_child_perm = bdrv_format_default_perms, 4750 .bdrv_co_create_opts = qcow2_co_create_opts, 4751 .bdrv_co_create = qcow2_co_create, 4752 .bdrv_has_zero_init = bdrv_has_zero_init_1, 4753 .bdrv_co_block_status = qcow2_co_block_status, 4754 4755 .bdrv_co_preadv = qcow2_co_preadv, 4756 .bdrv_co_pwritev = qcow2_co_pwritev, 4757 .bdrv_co_flush_to_os = qcow2_co_flush_to_os, 4758 4759 .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, 4760 .bdrv_co_pdiscard = qcow2_co_pdiscard, 4761 .bdrv_co_copy_range_from = qcow2_co_copy_range_from, 4762 .bdrv_co_copy_range_to = qcow2_co_copy_range_to, 4763 .bdrv_co_truncate = qcow2_co_truncate, 4764 .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed, 4765 .bdrv_make_empty = qcow2_make_empty, 4766 4767 .bdrv_snapshot_create = qcow2_snapshot_create, 4768 .bdrv_snapshot_goto = qcow2_snapshot_goto, 4769 .bdrv_snapshot_delete = qcow2_snapshot_delete, 4770 .bdrv_snapshot_list = qcow2_snapshot_list, 4771 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, 4772 .bdrv_measure = qcow2_measure, 4773 .bdrv_get_info = qcow2_get_info, 4774 .bdrv_get_specific_info = qcow2_get_specific_info, 4775 4776 .bdrv_save_vmstate = qcow2_save_vmstate, 4777 .bdrv_load_vmstate = qcow2_load_vmstate, 4778 4779 .supports_backing = true, 4780 .bdrv_change_backing_file = qcow2_change_backing_file, 4781 4782 .bdrv_refresh_limits = qcow2_refresh_limits, 4783 .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache, 4784 .bdrv_inactivate = qcow2_inactivate, 4785 4786 .create_opts = &qcow2_create_opts, 4787 .bdrv_co_check = qcow2_co_check, 4788 .bdrv_amend_options = qcow2_amend_options, 4789 4790 .bdrv_detach_aio_context = qcow2_detach_aio_context, 4791 .bdrv_attach_aio_context = qcow2_attach_aio_context, 4792 4793 .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw, 4794 .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap, 4795 .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap, 4796 }; 4797 4798 static void bdrv_qcow2_init(void) 4799 { 4800 bdrv_register(&bdrv_qcow2); 4801 } 4802 4803 block_init(bdrv_qcow2_init); 4804