1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 27 #define ZLIB_CONST 28 #include <zlib.h> 29 30 #include "block/block_int.h" 31 #include "block/qdict.h" 32 #include "sysemu/block-backend.h" 33 #include "qemu/module.h" 34 #include "qcow2.h" 35 #include "qemu/error-report.h" 36 #include "qapi/error.h" 37 #include "qapi/qapi-events-block-core.h" 38 #include "qapi/qmp/qdict.h" 39 #include "qapi/qmp/qstring.h" 40 #include "trace.h" 41 #include "qemu/option_int.h" 42 #include "qemu/cutils.h" 43 #include "qemu/bswap.h" 44 #include "qapi/qobject-input-visitor.h" 45 #include "qapi/qapi-visit-block-core.h" 46 #include "crypto.h" 47 #include "block/thread-pool.h" 48 49 /* 50 Differences with QCOW: 51 52 - Support for multiple incremental snapshots. 53 - Memory management by reference counts. 54 - Clusters which have a reference count of one have the bit 55 QCOW_OFLAG_COPIED to optimize write performance. 56 - Size of compressed clusters is stored in sectors to reduce bit usage 57 in the cluster offsets. 58 - Support for storing additional data (such as the VM state) in the 59 snapshots. 60 - If a backing store is used, the cluster size is not constrained 61 (could be backported to QCOW). 62 - L2 tables have always a size of one cluster. 63 */ 64 65 66 typedef struct { 67 uint32_t magic; 68 uint32_t len; 69 } QEMU_PACKED QCowExtension; 70 71 #define QCOW2_EXT_MAGIC_END 0 72 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA 73 #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 74 #define QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77 75 #define QCOW2_EXT_MAGIC_BITMAPS 0x23852875 76 77 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) 78 { 79 const QCowHeader *cow_header = (const void *)buf; 80 81 if (buf_size >= sizeof(QCowHeader) && 82 be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 83 be32_to_cpu(cow_header->version) >= 2) 84 return 100; 85 else 86 return 0; 87 } 88 89 90 static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset, 91 uint8_t *buf, size_t buflen, 92 void *opaque, Error **errp) 93 { 94 BlockDriverState *bs = opaque; 95 BDRVQcow2State *s = bs->opaque; 96 ssize_t ret; 97 98 if ((offset + buflen) > s->crypto_header.length) { 99 error_setg(errp, "Request for data outside of extension header"); 100 return -1; 101 } 102 103 ret = bdrv_pread(bs->file, 104 s->crypto_header.offset + offset, buf, buflen); 105 if (ret < 0) { 106 error_setg_errno(errp, -ret, "Could not read encryption header"); 107 return -1; 108 } 109 return ret; 110 } 111 112 113 static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen, 114 void *opaque, Error **errp) 115 { 116 BlockDriverState *bs = opaque; 117 BDRVQcow2State *s = bs->opaque; 118 int64_t ret; 119 int64_t clusterlen; 120 121 ret = qcow2_alloc_clusters(bs, headerlen); 122 if (ret < 0) { 123 error_setg_errno(errp, -ret, 124 "Cannot allocate cluster for LUKS header size %zu", 125 headerlen); 126 return -1; 127 } 128 129 s->crypto_header.length = headerlen; 130 s->crypto_header.offset = ret; 131 132 /* Zero fill remaining space in cluster so it has predictable 133 * content in case of future spec changes */ 134 clusterlen = size_to_clusters(s, headerlen) * s->cluster_size; 135 assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen) == 0); 136 ret = bdrv_pwrite_zeroes(bs->file, 137 ret + headerlen, 138 clusterlen - headerlen, 0); 139 if (ret < 0) { 140 error_setg_errno(errp, -ret, "Could not zero fill encryption header"); 141 return -1; 142 } 143 144 return ret; 145 } 146 147 148 static ssize_t qcow2_crypto_hdr_write_func(QCryptoBlock *block, size_t offset, 149 const uint8_t *buf, size_t buflen, 150 void *opaque, Error **errp) 151 { 152 BlockDriverState *bs = opaque; 153 BDRVQcow2State *s = bs->opaque; 154 ssize_t ret; 155 156 if ((offset + buflen) > s->crypto_header.length) { 157 error_setg(errp, "Request for data outside of extension header"); 158 return -1; 159 } 160 161 ret = bdrv_pwrite(bs->file, 162 s->crypto_header.offset + offset, buf, buflen); 163 if (ret < 0) { 164 error_setg_errno(errp, -ret, "Could not read encryption header"); 165 return -1; 166 } 167 return ret; 168 } 169 170 171 /* 172 * read qcow2 extension and fill bs 173 * start reading from start_offset 174 * finish reading upon magic of value 0 or when end_offset reached 175 * unknown magic is skipped (future extension this version knows nothing about) 176 * return 0 upon success, non-0 otherwise 177 */ 178 static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, 179 uint64_t end_offset, void **p_feature_table, 180 int flags, bool *need_update_header, 181 Error **errp) 182 { 183 BDRVQcow2State *s = bs->opaque; 184 QCowExtension ext; 185 uint64_t offset; 186 int ret; 187 Qcow2BitmapHeaderExt bitmaps_ext; 188 189 if (need_update_header != NULL) { 190 *need_update_header = false; 191 } 192 193 #ifdef DEBUG_EXT 194 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); 195 #endif 196 offset = start_offset; 197 while (offset < end_offset) { 198 199 #ifdef DEBUG_EXT 200 /* Sanity check */ 201 if (offset > s->cluster_size) 202 printf("qcow2_read_extension: suspicious offset %lu\n", offset); 203 204 printf("attempting to read extended header in offset %lu\n", offset); 205 #endif 206 207 ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext)); 208 if (ret < 0) { 209 error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " 210 "pread fail from offset %" PRIu64, offset); 211 return 1; 212 } 213 be32_to_cpus(&ext.magic); 214 be32_to_cpus(&ext.len); 215 offset += sizeof(ext); 216 #ifdef DEBUG_EXT 217 printf("ext.magic = 0x%x\n", ext.magic); 218 #endif 219 if (offset > end_offset || ext.len > end_offset - offset) { 220 error_setg(errp, "Header extension too large"); 221 return -EINVAL; 222 } 223 224 switch (ext.magic) { 225 case QCOW2_EXT_MAGIC_END: 226 return 0; 227 228 case QCOW2_EXT_MAGIC_BACKING_FORMAT: 229 if (ext.len >= sizeof(bs->backing_format)) { 230 error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32 231 " too large (>=%zu)", ext.len, 232 sizeof(bs->backing_format)); 233 return 2; 234 } 235 ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len); 236 if (ret < 0) { 237 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " 238 "Could not read format name"); 239 return 3; 240 } 241 bs->backing_format[ext.len] = '\0'; 242 s->image_backing_format = g_strdup(bs->backing_format); 243 #ifdef DEBUG_EXT 244 printf("Qcow2: Got format extension %s\n", bs->backing_format); 245 #endif 246 break; 247 248 case QCOW2_EXT_MAGIC_FEATURE_TABLE: 249 if (p_feature_table != NULL) { 250 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); 251 ret = bdrv_pread(bs->file, offset , feature_table, ext.len); 252 if (ret < 0) { 253 error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " 254 "Could not read table"); 255 return ret; 256 } 257 258 *p_feature_table = feature_table; 259 } 260 break; 261 262 case QCOW2_EXT_MAGIC_CRYPTO_HEADER: { 263 unsigned int cflags = 0; 264 if (s->crypt_method_header != QCOW_CRYPT_LUKS) { 265 error_setg(errp, "CRYPTO header extension only " 266 "expected with LUKS encryption method"); 267 return -EINVAL; 268 } 269 if (ext.len != sizeof(Qcow2CryptoHeaderExtension)) { 270 error_setg(errp, "CRYPTO header extension size %u, " 271 "but expected size %zu", ext.len, 272 sizeof(Qcow2CryptoHeaderExtension)); 273 return -EINVAL; 274 } 275 276 ret = bdrv_pread(bs->file, offset, &s->crypto_header, ext.len); 277 if (ret < 0) { 278 error_setg_errno(errp, -ret, 279 "Unable to read CRYPTO header extension"); 280 return ret; 281 } 282 be64_to_cpus(&s->crypto_header.offset); 283 be64_to_cpus(&s->crypto_header.length); 284 285 if ((s->crypto_header.offset % s->cluster_size) != 0) { 286 error_setg(errp, "Encryption header offset '%" PRIu64 "' is " 287 "not a multiple of cluster size '%u'", 288 s->crypto_header.offset, s->cluster_size); 289 return -EINVAL; 290 } 291 292 if (flags & BDRV_O_NO_IO) { 293 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 294 } 295 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 296 qcow2_crypto_hdr_read_func, 297 bs, cflags, errp); 298 if (!s->crypto) { 299 return -EINVAL; 300 } 301 } break; 302 303 case QCOW2_EXT_MAGIC_BITMAPS: 304 if (ext.len != sizeof(bitmaps_ext)) { 305 error_setg_errno(errp, -ret, "bitmaps_ext: " 306 "Invalid extension length"); 307 return -EINVAL; 308 } 309 310 if (!(s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) { 311 if (s->qcow_version < 3) { 312 /* Let's be a bit more specific */ 313 warn_report("This qcow2 v2 image contains bitmaps, but " 314 "they may have been modified by a program " 315 "without persistent bitmap support; so now " 316 "they must all be considered inconsistent"); 317 } else { 318 warn_report("a program lacking bitmap support " 319 "modified this file, so all bitmaps are now " 320 "considered inconsistent"); 321 } 322 error_printf("Some clusters may be leaked, " 323 "run 'qemu-img check -r' on the image " 324 "file to fix."); 325 if (need_update_header != NULL) { 326 /* Updating is needed to drop invalid bitmap extension. */ 327 *need_update_header = true; 328 } 329 break; 330 } 331 332 ret = bdrv_pread(bs->file, offset, &bitmaps_ext, ext.len); 333 if (ret < 0) { 334 error_setg_errno(errp, -ret, "bitmaps_ext: " 335 "Could not read ext header"); 336 return ret; 337 } 338 339 if (bitmaps_ext.reserved32 != 0) { 340 error_setg_errno(errp, -ret, "bitmaps_ext: " 341 "Reserved field is not zero"); 342 return -EINVAL; 343 } 344 345 be32_to_cpus(&bitmaps_ext.nb_bitmaps); 346 be64_to_cpus(&bitmaps_ext.bitmap_directory_size); 347 be64_to_cpus(&bitmaps_ext.bitmap_directory_offset); 348 349 if (bitmaps_ext.nb_bitmaps > QCOW2_MAX_BITMAPS) { 350 error_setg(errp, 351 "bitmaps_ext: Image has %" PRIu32 " bitmaps, " 352 "exceeding the QEMU supported maximum of %d", 353 bitmaps_ext.nb_bitmaps, QCOW2_MAX_BITMAPS); 354 return -EINVAL; 355 } 356 357 if (bitmaps_ext.nb_bitmaps == 0) { 358 error_setg(errp, "found bitmaps extension with zero bitmaps"); 359 return -EINVAL; 360 } 361 362 if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) { 363 error_setg(errp, "bitmaps_ext: " 364 "invalid bitmap directory offset"); 365 return -EINVAL; 366 } 367 368 if (bitmaps_ext.bitmap_directory_size > 369 QCOW2_MAX_BITMAP_DIRECTORY_SIZE) { 370 error_setg(errp, "bitmaps_ext: " 371 "bitmap directory size (%" PRIu64 ") exceeds " 372 "the maximum supported size (%d)", 373 bitmaps_ext.bitmap_directory_size, 374 QCOW2_MAX_BITMAP_DIRECTORY_SIZE); 375 return -EINVAL; 376 } 377 378 s->nb_bitmaps = bitmaps_ext.nb_bitmaps; 379 s->bitmap_directory_offset = 380 bitmaps_ext.bitmap_directory_offset; 381 s->bitmap_directory_size = 382 bitmaps_ext.bitmap_directory_size; 383 384 #ifdef DEBUG_EXT 385 printf("Qcow2: Got bitmaps extension: " 386 "offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n", 387 s->bitmap_directory_offset, s->nb_bitmaps); 388 #endif 389 break; 390 391 default: 392 /* unknown magic - save it in case we need to rewrite the header */ 393 /* If you add a new feature, make sure to also update the fast 394 * path of qcow2_make_empty() to deal with it. */ 395 { 396 Qcow2UnknownHeaderExtension *uext; 397 398 uext = g_malloc0(sizeof(*uext) + ext.len); 399 uext->magic = ext.magic; 400 uext->len = ext.len; 401 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); 402 403 ret = bdrv_pread(bs->file, offset , uext->data, uext->len); 404 if (ret < 0) { 405 error_setg_errno(errp, -ret, "ERROR: unknown extension: " 406 "Could not read data"); 407 return ret; 408 } 409 } 410 break; 411 } 412 413 offset += ((ext.len + 7) & ~7); 414 } 415 416 return 0; 417 } 418 419 static void cleanup_unknown_header_ext(BlockDriverState *bs) 420 { 421 BDRVQcow2State *s = bs->opaque; 422 Qcow2UnknownHeaderExtension *uext, *next; 423 424 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { 425 QLIST_REMOVE(uext, next); 426 g_free(uext); 427 } 428 } 429 430 static void report_unsupported_feature(Error **errp, Qcow2Feature *table, 431 uint64_t mask) 432 { 433 char *features = g_strdup(""); 434 char *old; 435 436 while (table && table->name[0] != '\0') { 437 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { 438 if (mask & (1ULL << table->bit)) { 439 old = features; 440 features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "", 441 table->name); 442 g_free(old); 443 mask &= ~(1ULL << table->bit); 444 } 445 } 446 table++; 447 } 448 449 if (mask) { 450 old = features; 451 features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64, 452 old, *old ? ", " : "", mask); 453 g_free(old); 454 } 455 456 error_setg(errp, "Unsupported qcow2 feature(s): %s", features); 457 g_free(features); 458 } 459 460 /* 461 * Sets the dirty bit and flushes afterwards if necessary. 462 * 463 * The incompatible_features bit is only set if the image file header was 464 * updated successfully. Therefore it is not required to check the return 465 * value of this function. 466 */ 467 int qcow2_mark_dirty(BlockDriverState *bs) 468 { 469 BDRVQcow2State *s = bs->opaque; 470 uint64_t val; 471 int ret; 472 473 assert(s->qcow_version >= 3); 474 475 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 476 return 0; /* already dirty */ 477 } 478 479 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); 480 ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), 481 &val, sizeof(val)); 482 if (ret < 0) { 483 return ret; 484 } 485 ret = bdrv_flush(bs->file->bs); 486 if (ret < 0) { 487 return ret; 488 } 489 490 /* Only treat image as dirty if the header was updated successfully */ 491 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY; 492 return 0; 493 } 494 495 /* 496 * Clears the dirty bit and flushes before if necessary. Only call this 497 * function when there are no pending requests, it does not guard against 498 * concurrent requests dirtying the image. 499 */ 500 static int qcow2_mark_clean(BlockDriverState *bs) 501 { 502 BDRVQcow2State *s = bs->opaque; 503 504 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 505 int ret; 506 507 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY; 508 509 ret = qcow2_flush_caches(bs); 510 if (ret < 0) { 511 return ret; 512 } 513 514 return qcow2_update_header(bs); 515 } 516 return 0; 517 } 518 519 /* 520 * Marks the image as corrupt. 521 */ 522 int qcow2_mark_corrupt(BlockDriverState *bs) 523 { 524 BDRVQcow2State *s = bs->opaque; 525 526 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; 527 return qcow2_update_header(bs); 528 } 529 530 /* 531 * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes 532 * before if necessary. 533 */ 534 int qcow2_mark_consistent(BlockDriverState *bs) 535 { 536 BDRVQcow2State *s = bs->opaque; 537 538 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 539 int ret = qcow2_flush_caches(bs); 540 if (ret < 0) { 541 return ret; 542 } 543 544 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT; 545 return qcow2_update_header(bs); 546 } 547 return 0; 548 } 549 550 static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs, 551 BdrvCheckResult *result, 552 BdrvCheckMode fix) 553 { 554 int ret = qcow2_check_refcounts(bs, result, fix); 555 if (ret < 0) { 556 return ret; 557 } 558 559 if (fix && result->check_errors == 0 && result->corruptions == 0) { 560 ret = qcow2_mark_clean(bs); 561 if (ret < 0) { 562 return ret; 563 } 564 return qcow2_mark_consistent(bs); 565 } 566 return ret; 567 } 568 569 static int coroutine_fn qcow2_co_check(BlockDriverState *bs, 570 BdrvCheckResult *result, 571 BdrvCheckMode fix) 572 { 573 BDRVQcow2State *s = bs->opaque; 574 int ret; 575 576 qemu_co_mutex_lock(&s->lock); 577 ret = qcow2_co_check_locked(bs, result, fix); 578 qemu_co_mutex_unlock(&s->lock); 579 return ret; 580 } 581 582 int qcow2_validate_table(BlockDriverState *bs, uint64_t offset, 583 uint64_t entries, size_t entry_len, 584 int64_t max_size_bytes, const char *table_name, 585 Error **errp) 586 { 587 BDRVQcow2State *s = bs->opaque; 588 589 if (entries > max_size_bytes / entry_len) { 590 error_setg(errp, "%s too large", table_name); 591 return -EFBIG; 592 } 593 594 /* Use signed INT64_MAX as the maximum even for uint64_t header fields, 595 * because values will be passed to qemu functions taking int64_t. */ 596 if ((INT64_MAX - entries * entry_len < offset) || 597 (offset_into_cluster(s, offset) != 0)) { 598 error_setg(errp, "%s offset invalid", table_name); 599 return -EINVAL; 600 } 601 602 return 0; 603 } 604 605 static QemuOptsList qcow2_runtime_opts = { 606 .name = "qcow2", 607 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), 608 .desc = { 609 { 610 .name = QCOW2_OPT_LAZY_REFCOUNTS, 611 .type = QEMU_OPT_BOOL, 612 .help = "Postpone refcount updates", 613 }, 614 { 615 .name = QCOW2_OPT_DISCARD_REQUEST, 616 .type = QEMU_OPT_BOOL, 617 .help = "Pass guest discard requests to the layer below", 618 }, 619 { 620 .name = QCOW2_OPT_DISCARD_SNAPSHOT, 621 .type = QEMU_OPT_BOOL, 622 .help = "Generate discard requests when snapshot related space " 623 "is freed", 624 }, 625 { 626 .name = QCOW2_OPT_DISCARD_OTHER, 627 .type = QEMU_OPT_BOOL, 628 .help = "Generate discard requests when other clusters are freed", 629 }, 630 { 631 .name = QCOW2_OPT_OVERLAP, 632 .type = QEMU_OPT_STRING, 633 .help = "Selects which overlap checks to perform from a range of " 634 "templates (none, constant, cached, all)", 635 }, 636 { 637 .name = QCOW2_OPT_OVERLAP_TEMPLATE, 638 .type = QEMU_OPT_STRING, 639 .help = "Selects which overlap checks to perform from a range of " 640 "templates (none, constant, cached, all)", 641 }, 642 { 643 .name = QCOW2_OPT_OVERLAP_MAIN_HEADER, 644 .type = QEMU_OPT_BOOL, 645 .help = "Check for unintended writes into the main qcow2 header", 646 }, 647 { 648 .name = QCOW2_OPT_OVERLAP_ACTIVE_L1, 649 .type = QEMU_OPT_BOOL, 650 .help = "Check for unintended writes into the active L1 table", 651 }, 652 { 653 .name = QCOW2_OPT_OVERLAP_ACTIVE_L2, 654 .type = QEMU_OPT_BOOL, 655 .help = "Check for unintended writes into an active L2 table", 656 }, 657 { 658 .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 659 .type = QEMU_OPT_BOOL, 660 .help = "Check for unintended writes into the refcount table", 661 }, 662 { 663 .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 664 .type = QEMU_OPT_BOOL, 665 .help = "Check for unintended writes into a refcount block", 666 }, 667 { 668 .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 669 .type = QEMU_OPT_BOOL, 670 .help = "Check for unintended writes into the snapshot table", 671 }, 672 { 673 .name = QCOW2_OPT_OVERLAP_INACTIVE_L1, 674 .type = QEMU_OPT_BOOL, 675 .help = "Check for unintended writes into an inactive L1 table", 676 }, 677 { 678 .name = QCOW2_OPT_OVERLAP_INACTIVE_L2, 679 .type = QEMU_OPT_BOOL, 680 .help = "Check for unintended writes into an inactive L2 table", 681 }, 682 { 683 .name = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, 684 .type = QEMU_OPT_BOOL, 685 .help = "Check for unintended writes into the bitmap directory", 686 }, 687 { 688 .name = QCOW2_OPT_CACHE_SIZE, 689 .type = QEMU_OPT_SIZE, 690 .help = "Maximum combined metadata (L2 tables and refcount blocks) " 691 "cache size", 692 }, 693 { 694 .name = QCOW2_OPT_L2_CACHE_SIZE, 695 .type = QEMU_OPT_SIZE, 696 .help = "Maximum L2 table cache size", 697 }, 698 { 699 .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE, 700 .type = QEMU_OPT_SIZE, 701 .help = "Size of each entry in the L2 cache", 702 }, 703 { 704 .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE, 705 .type = QEMU_OPT_SIZE, 706 .help = "Maximum refcount block cache size", 707 }, 708 { 709 .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, 710 .type = QEMU_OPT_NUMBER, 711 .help = "Clean unused cache entries after this time (in seconds)", 712 }, 713 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", 714 "ID of secret providing qcow2 AES key or LUKS passphrase"), 715 { /* end of list */ } 716 }, 717 }; 718 719 static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { 720 [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER, 721 [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1, 722 [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2, 723 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 724 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 725 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 726 [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1, 727 [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, 728 [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, 729 }; 730 731 static void cache_clean_timer_cb(void *opaque) 732 { 733 BlockDriverState *bs = opaque; 734 BDRVQcow2State *s = bs->opaque; 735 qcow2_cache_clean_unused(s->l2_table_cache); 736 qcow2_cache_clean_unused(s->refcount_block_cache); 737 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 738 (int64_t) s->cache_clean_interval * 1000); 739 } 740 741 static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) 742 { 743 BDRVQcow2State *s = bs->opaque; 744 if (s->cache_clean_interval > 0) { 745 s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, 746 SCALE_MS, cache_clean_timer_cb, 747 bs); 748 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 749 (int64_t) s->cache_clean_interval * 1000); 750 } 751 } 752 753 static void cache_clean_timer_del(BlockDriverState *bs) 754 { 755 BDRVQcow2State *s = bs->opaque; 756 if (s->cache_clean_timer) { 757 timer_del(s->cache_clean_timer); 758 timer_free(s->cache_clean_timer); 759 s->cache_clean_timer = NULL; 760 } 761 } 762 763 static void qcow2_detach_aio_context(BlockDriverState *bs) 764 { 765 cache_clean_timer_del(bs); 766 } 767 768 static void qcow2_attach_aio_context(BlockDriverState *bs, 769 AioContext *new_context) 770 { 771 cache_clean_timer_init(bs, new_context); 772 } 773 774 static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, 775 uint64_t *l2_cache_size, 776 uint64_t *l2_cache_entry_size, 777 uint64_t *refcount_cache_size, Error **errp) 778 { 779 BDRVQcow2State *s = bs->opaque; 780 uint64_t combined_cache_size; 781 bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; 782 int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; 783 784 combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); 785 l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); 786 refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 787 788 combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0); 789 *l2_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 0); 790 *refcount_cache_size = qemu_opt_get_size(opts, 791 QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0); 792 793 *l2_cache_entry_size = qemu_opt_get_size( 794 opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size); 795 796 if (combined_cache_size_set) { 797 if (l2_cache_size_set && refcount_cache_size_set) { 798 error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE 799 " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set " 800 "the same time"); 801 return; 802 } else if (*l2_cache_size > combined_cache_size) { 803 error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed " 804 QCOW2_OPT_CACHE_SIZE); 805 return; 806 } else if (*refcount_cache_size > combined_cache_size) { 807 error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed " 808 QCOW2_OPT_CACHE_SIZE); 809 return; 810 } 811 812 if (l2_cache_size_set) { 813 *refcount_cache_size = combined_cache_size - *l2_cache_size; 814 } else if (refcount_cache_size_set) { 815 *l2_cache_size = combined_cache_size - *refcount_cache_size; 816 } else { 817 uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; 818 uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); 819 820 /* Assign as much memory as possible to the L2 cache, and 821 * use the remainder for the refcount cache */ 822 if (combined_cache_size >= max_l2_cache + min_refcount_cache) { 823 *l2_cache_size = max_l2_cache; 824 *refcount_cache_size = combined_cache_size - *l2_cache_size; 825 } else { 826 *refcount_cache_size = 827 MIN(combined_cache_size, min_refcount_cache); 828 *l2_cache_size = combined_cache_size - *refcount_cache_size; 829 } 830 } 831 } else { 832 if (!l2_cache_size_set) { 833 *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE, 834 (uint64_t)DEFAULT_L2_CACHE_CLUSTERS 835 * s->cluster_size); 836 } 837 if (!refcount_cache_size_set) { 838 *refcount_cache_size = min_refcount_cache; 839 } 840 } 841 842 if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) || 843 *l2_cache_entry_size > s->cluster_size || 844 !is_power_of_2(*l2_cache_entry_size)) { 845 error_setg(errp, "L2 cache entry size must be a power of two " 846 "between %d and the cluster size (%d)", 847 1 << MIN_CLUSTER_BITS, s->cluster_size); 848 return; 849 } 850 } 851 852 typedef struct Qcow2ReopenState { 853 Qcow2Cache *l2_table_cache; 854 Qcow2Cache *refcount_block_cache; 855 int l2_slice_size; /* Number of entries in a slice of the L2 table */ 856 bool use_lazy_refcounts; 857 int overlap_check; 858 bool discard_passthrough[QCOW2_DISCARD_MAX]; 859 uint64_t cache_clean_interval; 860 QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ 861 } Qcow2ReopenState; 862 863 static int qcow2_update_options_prepare(BlockDriverState *bs, 864 Qcow2ReopenState *r, 865 QDict *options, int flags, 866 Error **errp) 867 { 868 BDRVQcow2State *s = bs->opaque; 869 QemuOpts *opts = NULL; 870 const char *opt_overlap_check, *opt_overlap_check_template; 871 int overlap_check_template = 0; 872 uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size; 873 int i; 874 const char *encryptfmt; 875 QDict *encryptopts = NULL; 876 Error *local_err = NULL; 877 int ret; 878 879 qdict_extract_subqdict(options, &encryptopts, "encrypt."); 880 encryptfmt = qdict_get_try_str(encryptopts, "format"); 881 882 opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); 883 qemu_opts_absorb_qdict(opts, options, &local_err); 884 if (local_err) { 885 error_propagate(errp, local_err); 886 ret = -EINVAL; 887 goto fail; 888 } 889 890 /* get L2 table/refcount block cache size from command line options */ 891 read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size, 892 &refcount_cache_size, &local_err); 893 if (local_err) { 894 error_propagate(errp, local_err); 895 ret = -EINVAL; 896 goto fail; 897 } 898 899 l2_cache_size /= l2_cache_entry_size; 900 if (l2_cache_size < MIN_L2_CACHE_SIZE) { 901 l2_cache_size = MIN_L2_CACHE_SIZE; 902 } 903 if (l2_cache_size > INT_MAX) { 904 error_setg(errp, "L2 cache size too big"); 905 ret = -EINVAL; 906 goto fail; 907 } 908 909 refcount_cache_size /= s->cluster_size; 910 if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { 911 refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; 912 } 913 if (refcount_cache_size > INT_MAX) { 914 error_setg(errp, "Refcount cache size too big"); 915 ret = -EINVAL; 916 goto fail; 917 } 918 919 /* alloc new L2 table/refcount block cache, flush old one */ 920 if (s->l2_table_cache) { 921 ret = qcow2_cache_flush(bs, s->l2_table_cache); 922 if (ret) { 923 error_setg_errno(errp, -ret, "Failed to flush the L2 table cache"); 924 goto fail; 925 } 926 } 927 928 if (s->refcount_block_cache) { 929 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 930 if (ret) { 931 error_setg_errno(errp, -ret, 932 "Failed to flush the refcount block cache"); 933 goto fail; 934 } 935 } 936 937 r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t); 938 r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size, 939 l2_cache_entry_size); 940 r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size, 941 s->cluster_size); 942 if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { 943 error_setg(errp, "Could not allocate metadata caches"); 944 ret = -ENOMEM; 945 goto fail; 946 } 947 948 /* New interval for cache cleanup timer */ 949 r->cache_clean_interval = 950 qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 951 s->cache_clean_interval); 952 #ifndef CONFIG_LINUX 953 if (r->cache_clean_interval != 0) { 954 error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL 955 " not supported on this host"); 956 ret = -EINVAL; 957 goto fail; 958 } 959 #endif 960 if (r->cache_clean_interval > UINT_MAX) { 961 error_setg(errp, "Cache clean interval too big"); 962 ret = -EINVAL; 963 goto fail; 964 } 965 966 /* lazy-refcounts; flush if going from enabled to disabled */ 967 r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, 968 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); 969 if (r->use_lazy_refcounts && s->qcow_version < 3) { 970 error_setg(errp, "Lazy refcounts require a qcow2 image with at least " 971 "qemu 1.1 compatibility level"); 972 ret = -EINVAL; 973 goto fail; 974 } 975 976 if (s->use_lazy_refcounts && !r->use_lazy_refcounts) { 977 ret = qcow2_mark_clean(bs); 978 if (ret < 0) { 979 error_setg_errno(errp, -ret, "Failed to disable lazy refcounts"); 980 goto fail; 981 } 982 } 983 984 /* Overlap check options */ 985 opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); 986 opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); 987 if (opt_overlap_check_template && opt_overlap_check && 988 strcmp(opt_overlap_check_template, opt_overlap_check)) 989 { 990 error_setg(errp, "Conflicting values for qcow2 options '" 991 QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE 992 "' ('%s')", opt_overlap_check, opt_overlap_check_template); 993 ret = -EINVAL; 994 goto fail; 995 } 996 if (!opt_overlap_check) { 997 opt_overlap_check = opt_overlap_check_template ?: "cached"; 998 } 999 1000 if (!strcmp(opt_overlap_check, "none")) { 1001 overlap_check_template = 0; 1002 } else if (!strcmp(opt_overlap_check, "constant")) { 1003 overlap_check_template = QCOW2_OL_CONSTANT; 1004 } else if (!strcmp(opt_overlap_check, "cached")) { 1005 overlap_check_template = QCOW2_OL_CACHED; 1006 } else if (!strcmp(opt_overlap_check, "all")) { 1007 overlap_check_template = QCOW2_OL_ALL; 1008 } else { 1009 error_setg(errp, "Unsupported value '%s' for qcow2 option " 1010 "'overlap-check'. Allowed are any of the following: " 1011 "none, constant, cached, all", opt_overlap_check); 1012 ret = -EINVAL; 1013 goto fail; 1014 } 1015 1016 r->overlap_check = 0; 1017 for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { 1018 /* overlap-check defines a template bitmask, but every flag may be 1019 * overwritten through the associated boolean option */ 1020 r->overlap_check |= 1021 qemu_opt_get_bool(opts, overlap_bool_option_names[i], 1022 overlap_check_template & (1 << i)) << i; 1023 } 1024 1025 r->discard_passthrough[QCOW2_DISCARD_NEVER] = false; 1026 r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; 1027 r->discard_passthrough[QCOW2_DISCARD_REQUEST] = 1028 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, 1029 flags & BDRV_O_UNMAP); 1030 r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = 1031 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); 1032 r->discard_passthrough[QCOW2_DISCARD_OTHER] = 1033 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); 1034 1035 switch (s->crypt_method_header) { 1036 case QCOW_CRYPT_NONE: 1037 if (encryptfmt) { 1038 error_setg(errp, "No encryption in image header, but options " 1039 "specified format '%s'", encryptfmt); 1040 ret = -EINVAL; 1041 goto fail; 1042 } 1043 break; 1044 1045 case QCOW_CRYPT_AES: 1046 if (encryptfmt && !g_str_equal(encryptfmt, "aes")) { 1047 error_setg(errp, 1048 "Header reported 'aes' encryption format but " 1049 "options specify '%s'", encryptfmt); 1050 ret = -EINVAL; 1051 goto fail; 1052 } 1053 qdict_put_str(encryptopts, "format", "qcow"); 1054 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); 1055 break; 1056 1057 case QCOW_CRYPT_LUKS: 1058 if (encryptfmt && !g_str_equal(encryptfmt, "luks")) { 1059 error_setg(errp, 1060 "Header reported 'luks' encryption format but " 1061 "options specify '%s'", encryptfmt); 1062 ret = -EINVAL; 1063 goto fail; 1064 } 1065 qdict_put_str(encryptopts, "format", "luks"); 1066 r->crypto_opts = block_crypto_open_opts_init(encryptopts, errp); 1067 break; 1068 1069 default: 1070 error_setg(errp, "Unsupported encryption method %d", 1071 s->crypt_method_header); 1072 break; 1073 } 1074 if (s->crypt_method_header != QCOW_CRYPT_NONE && !r->crypto_opts) { 1075 ret = -EINVAL; 1076 goto fail; 1077 } 1078 1079 ret = 0; 1080 fail: 1081 qobject_unref(encryptopts); 1082 qemu_opts_del(opts); 1083 opts = NULL; 1084 return ret; 1085 } 1086 1087 static void qcow2_update_options_commit(BlockDriverState *bs, 1088 Qcow2ReopenState *r) 1089 { 1090 BDRVQcow2State *s = bs->opaque; 1091 int i; 1092 1093 if (s->l2_table_cache) { 1094 qcow2_cache_destroy(s->l2_table_cache); 1095 } 1096 if (s->refcount_block_cache) { 1097 qcow2_cache_destroy(s->refcount_block_cache); 1098 } 1099 s->l2_table_cache = r->l2_table_cache; 1100 s->refcount_block_cache = r->refcount_block_cache; 1101 s->l2_slice_size = r->l2_slice_size; 1102 1103 s->overlap_check = r->overlap_check; 1104 s->use_lazy_refcounts = r->use_lazy_refcounts; 1105 1106 for (i = 0; i < QCOW2_DISCARD_MAX; i++) { 1107 s->discard_passthrough[i] = r->discard_passthrough[i]; 1108 } 1109 1110 if (s->cache_clean_interval != r->cache_clean_interval) { 1111 cache_clean_timer_del(bs); 1112 s->cache_clean_interval = r->cache_clean_interval; 1113 cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); 1114 } 1115 1116 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 1117 s->crypto_opts = r->crypto_opts; 1118 } 1119 1120 static void qcow2_update_options_abort(BlockDriverState *bs, 1121 Qcow2ReopenState *r) 1122 { 1123 if (r->l2_table_cache) { 1124 qcow2_cache_destroy(r->l2_table_cache); 1125 } 1126 if (r->refcount_block_cache) { 1127 qcow2_cache_destroy(r->refcount_block_cache); 1128 } 1129 qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); 1130 } 1131 1132 static int qcow2_update_options(BlockDriverState *bs, QDict *options, 1133 int flags, Error **errp) 1134 { 1135 Qcow2ReopenState r = {}; 1136 int ret; 1137 1138 ret = qcow2_update_options_prepare(bs, &r, options, flags, errp); 1139 if (ret >= 0) { 1140 qcow2_update_options_commit(bs, &r); 1141 } else { 1142 qcow2_update_options_abort(bs, &r); 1143 } 1144 1145 return ret; 1146 } 1147 1148 /* Called with s->lock held. */ 1149 static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, 1150 int flags, Error **errp) 1151 { 1152 BDRVQcow2State *s = bs->opaque; 1153 unsigned int len, i; 1154 int ret = 0; 1155 QCowHeader header; 1156 Error *local_err = NULL; 1157 uint64_t ext_end; 1158 uint64_t l1_vm_state_index; 1159 bool update_header = false; 1160 bool header_updated = false; 1161 1162 ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); 1163 if (ret < 0) { 1164 error_setg_errno(errp, -ret, "Could not read qcow2 header"); 1165 goto fail; 1166 } 1167 be32_to_cpus(&header.magic); 1168 be32_to_cpus(&header.version); 1169 be64_to_cpus(&header.backing_file_offset); 1170 be32_to_cpus(&header.backing_file_size); 1171 be64_to_cpus(&header.size); 1172 be32_to_cpus(&header.cluster_bits); 1173 be32_to_cpus(&header.crypt_method); 1174 be64_to_cpus(&header.l1_table_offset); 1175 be32_to_cpus(&header.l1_size); 1176 be64_to_cpus(&header.refcount_table_offset); 1177 be32_to_cpus(&header.refcount_table_clusters); 1178 be64_to_cpus(&header.snapshots_offset); 1179 be32_to_cpus(&header.nb_snapshots); 1180 1181 if (header.magic != QCOW_MAGIC) { 1182 error_setg(errp, "Image is not in qcow2 format"); 1183 ret = -EINVAL; 1184 goto fail; 1185 } 1186 if (header.version < 2 || header.version > 3) { 1187 error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version); 1188 ret = -ENOTSUP; 1189 goto fail; 1190 } 1191 1192 s->qcow_version = header.version; 1193 1194 /* Initialise cluster size */ 1195 if (header.cluster_bits < MIN_CLUSTER_BITS || 1196 header.cluster_bits > MAX_CLUSTER_BITS) { 1197 error_setg(errp, "Unsupported cluster size: 2^%" PRIu32, 1198 header.cluster_bits); 1199 ret = -EINVAL; 1200 goto fail; 1201 } 1202 1203 s->cluster_bits = header.cluster_bits; 1204 s->cluster_size = 1 << s->cluster_bits; 1205 s->cluster_sectors = 1 << (s->cluster_bits - BDRV_SECTOR_BITS); 1206 1207 /* Initialise version 3 header fields */ 1208 if (header.version == 2) { 1209 header.incompatible_features = 0; 1210 header.compatible_features = 0; 1211 header.autoclear_features = 0; 1212 header.refcount_order = 4; 1213 header.header_length = 72; 1214 } else { 1215 be64_to_cpus(&header.incompatible_features); 1216 be64_to_cpus(&header.compatible_features); 1217 be64_to_cpus(&header.autoclear_features); 1218 be32_to_cpus(&header.refcount_order); 1219 be32_to_cpus(&header.header_length); 1220 1221 if (header.header_length < 104) { 1222 error_setg(errp, "qcow2 header too short"); 1223 ret = -EINVAL; 1224 goto fail; 1225 } 1226 } 1227 1228 if (header.header_length > s->cluster_size) { 1229 error_setg(errp, "qcow2 header exceeds cluster size"); 1230 ret = -EINVAL; 1231 goto fail; 1232 } 1233 1234 if (header.header_length > sizeof(header)) { 1235 s->unknown_header_fields_size = header.header_length - sizeof(header); 1236 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); 1237 ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, 1238 s->unknown_header_fields_size); 1239 if (ret < 0) { 1240 error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " 1241 "fields"); 1242 goto fail; 1243 } 1244 } 1245 1246 if (header.backing_file_offset > s->cluster_size) { 1247 error_setg(errp, "Invalid backing file offset"); 1248 ret = -EINVAL; 1249 goto fail; 1250 } 1251 1252 if (header.backing_file_offset) { 1253 ext_end = header.backing_file_offset; 1254 } else { 1255 ext_end = 1 << header.cluster_bits; 1256 } 1257 1258 /* Handle feature bits */ 1259 s->incompatible_features = header.incompatible_features; 1260 s->compatible_features = header.compatible_features; 1261 s->autoclear_features = header.autoclear_features; 1262 1263 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { 1264 void *feature_table = NULL; 1265 qcow2_read_extensions(bs, header.header_length, ext_end, 1266 &feature_table, flags, NULL, NULL); 1267 report_unsupported_feature(errp, feature_table, 1268 s->incompatible_features & 1269 ~QCOW2_INCOMPAT_MASK); 1270 ret = -ENOTSUP; 1271 g_free(feature_table); 1272 goto fail; 1273 } 1274 1275 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 1276 /* Corrupt images may not be written to unless they are being repaired 1277 */ 1278 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { 1279 error_setg(errp, "qcow2: Image is corrupt; cannot be opened " 1280 "read/write"); 1281 ret = -EACCES; 1282 goto fail; 1283 } 1284 } 1285 1286 /* Check support for various header values */ 1287 if (header.refcount_order > 6) { 1288 error_setg(errp, "Reference count entry width too large; may not " 1289 "exceed 64 bits"); 1290 ret = -EINVAL; 1291 goto fail; 1292 } 1293 s->refcount_order = header.refcount_order; 1294 s->refcount_bits = 1 << s->refcount_order; 1295 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); 1296 s->refcount_max += s->refcount_max - 1; 1297 1298 s->crypt_method_header = header.crypt_method; 1299 if (s->crypt_method_header) { 1300 if (bdrv_uses_whitelist() && 1301 s->crypt_method_header == QCOW_CRYPT_AES) { 1302 error_setg(errp, 1303 "Use of AES-CBC encrypted qcow2 images is no longer " 1304 "supported in system emulators"); 1305 error_append_hint(errp, 1306 "You can use 'qemu-img convert' to convert your " 1307 "image to an alternative supported format, such " 1308 "as unencrypted qcow2, or raw with the LUKS " 1309 "format instead.\n"); 1310 ret = -ENOSYS; 1311 goto fail; 1312 } 1313 1314 if (s->crypt_method_header == QCOW_CRYPT_AES) { 1315 s->crypt_physical_offset = false; 1316 } else { 1317 /* Assuming LUKS and any future crypt methods we 1318 * add will all use physical offsets, due to the 1319 * fact that the alternative is insecure... */ 1320 s->crypt_physical_offset = true; 1321 } 1322 1323 bs->encrypted = true; 1324 } 1325 1326 s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ 1327 s->l2_size = 1 << s->l2_bits; 1328 /* 2^(s->refcount_order - 3) is the refcount width in bytes */ 1329 s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3); 1330 s->refcount_block_size = 1 << s->refcount_block_bits; 1331 bs->total_sectors = header.size / 512; 1332 s->csize_shift = (62 - (s->cluster_bits - 8)); 1333 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; 1334 s->cluster_offset_mask = (1LL << s->csize_shift) - 1; 1335 1336 s->refcount_table_offset = header.refcount_table_offset; 1337 s->refcount_table_size = 1338 header.refcount_table_clusters << (s->cluster_bits - 3); 1339 1340 if (header.refcount_table_clusters == 0 && !(flags & BDRV_O_CHECK)) { 1341 error_setg(errp, "Image does not contain a reference count table"); 1342 ret = -EINVAL; 1343 goto fail; 1344 } 1345 1346 ret = qcow2_validate_table(bs, s->refcount_table_offset, 1347 header.refcount_table_clusters, 1348 s->cluster_size, QCOW_MAX_REFTABLE_SIZE, 1349 "Reference count table", errp); 1350 if (ret < 0) { 1351 goto fail; 1352 } 1353 1354 /* The total size in bytes of the snapshot table is checked in 1355 * qcow2_read_snapshots() because the size of each snapshot is 1356 * variable and we don't know it yet. 1357 * Here we only check the offset and number of snapshots. */ 1358 ret = qcow2_validate_table(bs, header.snapshots_offset, 1359 header.nb_snapshots, 1360 sizeof(QCowSnapshotHeader), 1361 sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS, 1362 "Snapshot table", errp); 1363 if (ret < 0) { 1364 goto fail; 1365 } 1366 1367 /* read the level 1 table */ 1368 ret = qcow2_validate_table(bs, header.l1_table_offset, 1369 header.l1_size, sizeof(uint64_t), 1370 QCOW_MAX_L1_SIZE, "Active L1 table", errp); 1371 if (ret < 0) { 1372 goto fail; 1373 } 1374 s->l1_size = header.l1_size; 1375 s->l1_table_offset = header.l1_table_offset; 1376 1377 l1_vm_state_index = size_to_l1(s, header.size); 1378 if (l1_vm_state_index > INT_MAX) { 1379 error_setg(errp, "Image is too big"); 1380 ret = -EFBIG; 1381 goto fail; 1382 } 1383 s->l1_vm_state_index = l1_vm_state_index; 1384 1385 /* the L1 table must contain at least enough entries to put 1386 header.size bytes */ 1387 if (s->l1_size < s->l1_vm_state_index) { 1388 error_setg(errp, "L1 table is too small"); 1389 ret = -EINVAL; 1390 goto fail; 1391 } 1392 1393 if (s->l1_size > 0) { 1394 s->l1_table = qemu_try_blockalign(bs->file->bs, 1395 ROUND_UP(s->l1_size * sizeof(uint64_t), 512)); 1396 if (s->l1_table == NULL) { 1397 error_setg(errp, "Could not allocate L1 table"); 1398 ret = -ENOMEM; 1399 goto fail; 1400 } 1401 ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, 1402 s->l1_size * sizeof(uint64_t)); 1403 if (ret < 0) { 1404 error_setg_errno(errp, -ret, "Could not read L1 table"); 1405 goto fail; 1406 } 1407 for(i = 0;i < s->l1_size; i++) { 1408 be64_to_cpus(&s->l1_table[i]); 1409 } 1410 } 1411 1412 /* Parse driver-specific options */ 1413 ret = qcow2_update_options(bs, options, flags, errp); 1414 if (ret < 0) { 1415 goto fail; 1416 } 1417 1418 s->cluster_cache_offset = -1; 1419 s->flags = flags; 1420 1421 ret = qcow2_refcount_init(bs); 1422 if (ret != 0) { 1423 error_setg_errno(errp, -ret, "Could not initialize refcount handling"); 1424 goto fail; 1425 } 1426 1427 QLIST_INIT(&s->cluster_allocs); 1428 QTAILQ_INIT(&s->discards); 1429 1430 /* read qcow2 extensions */ 1431 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL, 1432 flags, &update_header, &local_err)) { 1433 error_propagate(errp, local_err); 1434 ret = -EINVAL; 1435 goto fail; 1436 } 1437 1438 /* qcow2_read_extension may have set up the crypto context 1439 * if the crypt method needs a header region, some methods 1440 * don't need header extensions, so must check here 1441 */ 1442 if (s->crypt_method_header && !s->crypto) { 1443 if (s->crypt_method_header == QCOW_CRYPT_AES) { 1444 unsigned int cflags = 0; 1445 if (flags & BDRV_O_NO_IO) { 1446 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 1447 } 1448 s->crypto = qcrypto_block_open(s->crypto_opts, "encrypt.", 1449 NULL, NULL, cflags, errp); 1450 if (!s->crypto) { 1451 ret = -EINVAL; 1452 goto fail; 1453 } 1454 } else if (!(flags & BDRV_O_NO_IO)) { 1455 error_setg(errp, "Missing CRYPTO header for crypt method %d", 1456 s->crypt_method_header); 1457 ret = -EINVAL; 1458 goto fail; 1459 } 1460 } 1461 1462 /* read the backing file name */ 1463 if (header.backing_file_offset != 0) { 1464 len = header.backing_file_size; 1465 if (len > MIN(1023, s->cluster_size - header.backing_file_offset) || 1466 len >= sizeof(bs->backing_file)) { 1467 error_setg(errp, "Backing file name too long"); 1468 ret = -EINVAL; 1469 goto fail; 1470 } 1471 ret = bdrv_pread(bs->file, header.backing_file_offset, 1472 bs->backing_file, len); 1473 if (ret < 0) { 1474 error_setg_errno(errp, -ret, "Could not read backing file name"); 1475 goto fail; 1476 } 1477 bs->backing_file[len] = '\0'; 1478 s->image_backing_file = g_strdup(bs->backing_file); 1479 } 1480 1481 /* Internal snapshots */ 1482 s->snapshots_offset = header.snapshots_offset; 1483 s->nb_snapshots = header.nb_snapshots; 1484 1485 ret = qcow2_read_snapshots(bs); 1486 if (ret < 0) { 1487 error_setg_errno(errp, -ret, "Could not read snapshots"); 1488 goto fail; 1489 } 1490 1491 /* Clear unknown autoclear feature bits */ 1492 update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK; 1493 update_header = 1494 update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE); 1495 if (update_header) { 1496 s->autoclear_features &= QCOW2_AUTOCLEAR_MASK; 1497 } 1498 1499 if (s->dirty_bitmaps_loaded) { 1500 /* It's some kind of reopen. There are no known cases where we need to 1501 * reload bitmaps in such a situation, so it's safer to skip them. 1502 * 1503 * Moreover, if we have some readonly bitmaps and we are reopening for 1504 * rw we should reopen bitmaps correspondingly. 1505 */ 1506 if (bdrv_has_readonly_bitmaps(bs) && 1507 !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) 1508 { 1509 qcow2_reopen_bitmaps_rw_hint(bs, &header_updated, &local_err); 1510 } 1511 } else { 1512 header_updated = qcow2_load_dirty_bitmaps(bs, &local_err); 1513 s->dirty_bitmaps_loaded = true; 1514 } 1515 update_header = update_header && !header_updated; 1516 if (local_err != NULL) { 1517 error_propagate(errp, local_err); 1518 ret = -EINVAL; 1519 goto fail; 1520 } 1521 1522 if (update_header) { 1523 ret = qcow2_update_header(bs); 1524 if (ret < 0) { 1525 error_setg_errno(errp, -ret, "Could not update qcow2 header"); 1526 goto fail; 1527 } 1528 } 1529 1530 bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0; 1531 1532 /* Repair image if dirty */ 1533 if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && 1534 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { 1535 BdrvCheckResult result = {0}; 1536 1537 ret = qcow2_co_check_locked(bs, &result, 1538 BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); 1539 if (ret < 0 || result.check_errors) { 1540 if (ret >= 0) { 1541 ret = -EIO; 1542 } 1543 error_setg_errno(errp, -ret, "Could not repair dirty image"); 1544 goto fail; 1545 } 1546 } 1547 1548 #ifdef DEBUG_ALLOC 1549 { 1550 BdrvCheckResult result = {0}; 1551 qcow2_check_refcounts(bs, &result, 0); 1552 } 1553 #endif 1554 1555 qemu_co_queue_init(&s->compress_wait_queue); 1556 1557 return ret; 1558 1559 fail: 1560 g_free(s->unknown_header_fields); 1561 cleanup_unknown_header_ext(bs); 1562 qcow2_free_snapshots(bs); 1563 qcow2_refcount_close(bs); 1564 qemu_vfree(s->l1_table); 1565 /* else pre-write overlap checks in cache_destroy may crash */ 1566 s->l1_table = NULL; 1567 cache_clean_timer_del(bs); 1568 if (s->l2_table_cache) { 1569 qcow2_cache_destroy(s->l2_table_cache); 1570 } 1571 if (s->refcount_block_cache) { 1572 qcow2_cache_destroy(s->refcount_block_cache); 1573 } 1574 qcrypto_block_free(s->crypto); 1575 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); 1576 return ret; 1577 } 1578 1579 typedef struct QCow2OpenCo { 1580 BlockDriverState *bs; 1581 QDict *options; 1582 int flags; 1583 Error **errp; 1584 int ret; 1585 } QCow2OpenCo; 1586 1587 static void coroutine_fn qcow2_open_entry(void *opaque) 1588 { 1589 QCow2OpenCo *qoc = opaque; 1590 BDRVQcow2State *s = qoc->bs->opaque; 1591 1592 qemu_co_mutex_lock(&s->lock); 1593 qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, qoc->errp); 1594 qemu_co_mutex_unlock(&s->lock); 1595 } 1596 1597 static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, 1598 Error **errp) 1599 { 1600 BDRVQcow2State *s = bs->opaque; 1601 QCow2OpenCo qoc = { 1602 .bs = bs, 1603 .options = options, 1604 .flags = flags, 1605 .errp = errp, 1606 .ret = -EINPROGRESS 1607 }; 1608 1609 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 1610 false, errp); 1611 if (!bs->file) { 1612 return -EINVAL; 1613 } 1614 1615 /* Initialise locks */ 1616 qemu_co_mutex_init(&s->lock); 1617 1618 if (qemu_in_coroutine()) { 1619 /* From bdrv_co_create. */ 1620 qcow2_open_entry(&qoc); 1621 } else { 1622 qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc)); 1623 BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS); 1624 } 1625 return qoc.ret; 1626 } 1627 1628 static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) 1629 { 1630 BDRVQcow2State *s = bs->opaque; 1631 1632 if (bs->encrypted) { 1633 /* Encryption works on a sector granularity */ 1634 bs->bl.request_alignment = BDRV_SECTOR_SIZE; 1635 } 1636 bs->bl.pwrite_zeroes_alignment = s->cluster_size; 1637 bs->bl.pdiscard_alignment = s->cluster_size; 1638 } 1639 1640 static int qcow2_reopen_prepare(BDRVReopenState *state, 1641 BlockReopenQueue *queue, Error **errp) 1642 { 1643 Qcow2ReopenState *r; 1644 int ret; 1645 1646 r = g_new0(Qcow2ReopenState, 1); 1647 state->opaque = r; 1648 1649 ret = qcow2_update_options_prepare(state->bs, r, state->options, 1650 state->flags, errp); 1651 if (ret < 0) { 1652 goto fail; 1653 } 1654 1655 /* We need to write out any unwritten data if we reopen read-only. */ 1656 if ((state->flags & BDRV_O_RDWR) == 0) { 1657 ret = qcow2_reopen_bitmaps_ro(state->bs, errp); 1658 if (ret < 0) { 1659 goto fail; 1660 } 1661 1662 ret = bdrv_flush(state->bs); 1663 if (ret < 0) { 1664 goto fail; 1665 } 1666 1667 ret = qcow2_mark_clean(state->bs); 1668 if (ret < 0) { 1669 goto fail; 1670 } 1671 } 1672 1673 return 0; 1674 1675 fail: 1676 qcow2_update_options_abort(state->bs, r); 1677 g_free(r); 1678 return ret; 1679 } 1680 1681 static void qcow2_reopen_commit(BDRVReopenState *state) 1682 { 1683 qcow2_update_options_commit(state->bs, state->opaque); 1684 g_free(state->opaque); 1685 } 1686 1687 static void qcow2_reopen_abort(BDRVReopenState *state) 1688 { 1689 qcow2_update_options_abort(state->bs, state->opaque); 1690 g_free(state->opaque); 1691 } 1692 1693 static void qcow2_join_options(QDict *options, QDict *old_options) 1694 { 1695 bool has_new_overlap_template = 1696 qdict_haskey(options, QCOW2_OPT_OVERLAP) || 1697 qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE); 1698 bool has_new_total_cache_size = 1699 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE); 1700 bool has_all_cache_options; 1701 1702 /* New overlap template overrides all old overlap options */ 1703 if (has_new_overlap_template) { 1704 qdict_del(old_options, QCOW2_OPT_OVERLAP); 1705 qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE); 1706 qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER); 1707 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1); 1708 qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2); 1709 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE); 1710 qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK); 1711 qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE); 1712 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1); 1713 qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2); 1714 } 1715 1716 /* New total cache size overrides all old options */ 1717 if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) { 1718 qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE); 1719 qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 1720 } 1721 1722 qdict_join(options, old_options, false); 1723 1724 /* 1725 * If after merging all cache size options are set, an old total size is 1726 * overwritten. Do keep all options, however, if all three are new. The 1727 * resulting error message is what we want to happen. 1728 */ 1729 has_all_cache_options = 1730 qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) || 1731 qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) || 1732 qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); 1733 1734 if (has_all_cache_options && !has_new_total_cache_size) { 1735 qdict_del(options, QCOW2_OPT_CACHE_SIZE); 1736 } 1737 } 1738 1739 static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, 1740 bool want_zero, 1741 int64_t offset, int64_t count, 1742 int64_t *pnum, int64_t *map, 1743 BlockDriverState **file) 1744 { 1745 BDRVQcow2State *s = bs->opaque; 1746 uint64_t cluster_offset; 1747 int index_in_cluster, ret; 1748 unsigned int bytes; 1749 int status = 0; 1750 1751 bytes = MIN(INT_MAX, count); 1752 qemu_co_mutex_lock(&s->lock); 1753 ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); 1754 qemu_co_mutex_unlock(&s->lock); 1755 if (ret < 0) { 1756 return ret; 1757 } 1758 1759 *pnum = bytes; 1760 1761 if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && 1762 !s->crypto) { 1763 index_in_cluster = offset & (s->cluster_size - 1); 1764 *map = cluster_offset | index_in_cluster; 1765 *file = bs->file->bs; 1766 status |= BDRV_BLOCK_OFFSET_VALID; 1767 } 1768 if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) { 1769 status |= BDRV_BLOCK_ZERO; 1770 } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { 1771 status |= BDRV_BLOCK_DATA; 1772 } 1773 return status; 1774 } 1775 1776 static coroutine_fn int qcow2_handle_l2meta(BlockDriverState *bs, 1777 QCowL2Meta **pl2meta, 1778 bool link_l2) 1779 { 1780 int ret = 0; 1781 QCowL2Meta *l2meta = *pl2meta; 1782 1783 while (l2meta != NULL) { 1784 QCowL2Meta *next; 1785 1786 if (link_l2) { 1787 ret = qcow2_alloc_cluster_link_l2(bs, l2meta); 1788 if (ret) { 1789 goto out; 1790 } 1791 } else { 1792 qcow2_alloc_cluster_abort(bs, l2meta); 1793 } 1794 1795 /* Take the request off the list of running requests */ 1796 if (l2meta->nb_clusters != 0) { 1797 QLIST_REMOVE(l2meta, next_in_flight); 1798 } 1799 1800 qemu_co_queue_restart_all(&l2meta->dependent_requests); 1801 1802 next = l2meta->next; 1803 g_free(l2meta); 1804 l2meta = next; 1805 } 1806 out: 1807 *pl2meta = l2meta; 1808 return ret; 1809 } 1810 1811 static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, 1812 uint64_t bytes, QEMUIOVector *qiov, 1813 int flags) 1814 { 1815 BDRVQcow2State *s = bs->opaque; 1816 int offset_in_cluster; 1817 int ret; 1818 unsigned int cur_bytes; /* number of bytes in current iteration */ 1819 uint64_t cluster_offset = 0; 1820 uint64_t bytes_done = 0; 1821 QEMUIOVector hd_qiov; 1822 uint8_t *cluster_data = NULL; 1823 1824 qemu_iovec_init(&hd_qiov, qiov->niov); 1825 1826 qemu_co_mutex_lock(&s->lock); 1827 1828 while (bytes != 0) { 1829 1830 /* prepare next request */ 1831 cur_bytes = MIN(bytes, INT_MAX); 1832 if (s->crypto) { 1833 cur_bytes = MIN(cur_bytes, 1834 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 1835 } 1836 1837 ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset); 1838 if (ret < 0) { 1839 goto fail; 1840 } 1841 1842 offset_in_cluster = offset_into_cluster(s, offset); 1843 1844 qemu_iovec_reset(&hd_qiov); 1845 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); 1846 1847 switch (ret) { 1848 case QCOW2_CLUSTER_UNALLOCATED: 1849 1850 if (bs->backing) { 1851 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 1852 qemu_co_mutex_unlock(&s->lock); 1853 ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, 1854 &hd_qiov, 0); 1855 qemu_co_mutex_lock(&s->lock); 1856 if (ret < 0) { 1857 goto fail; 1858 } 1859 } else { 1860 /* Note: in this case, no need to wait */ 1861 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); 1862 } 1863 break; 1864 1865 case QCOW2_CLUSTER_ZERO_PLAIN: 1866 case QCOW2_CLUSTER_ZERO_ALLOC: 1867 qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); 1868 break; 1869 1870 case QCOW2_CLUSTER_COMPRESSED: 1871 /* add AIO support for compressed blocks ? */ 1872 ret = qcow2_decompress_cluster(bs, cluster_offset); 1873 if (ret < 0) { 1874 goto fail; 1875 } 1876 1877 qemu_iovec_from_buf(&hd_qiov, 0, 1878 s->cluster_cache + offset_in_cluster, 1879 cur_bytes); 1880 break; 1881 1882 case QCOW2_CLUSTER_NORMAL: 1883 if ((cluster_offset & 511) != 0) { 1884 ret = -EIO; 1885 goto fail; 1886 } 1887 1888 if (bs->encrypted) { 1889 assert(s->crypto); 1890 1891 /* 1892 * For encrypted images, read everything into a temporary 1893 * contiguous buffer on which the AES functions can work. 1894 */ 1895 if (!cluster_data) { 1896 cluster_data = 1897 qemu_try_blockalign(bs->file->bs, 1898 QCOW_MAX_CRYPT_CLUSTERS 1899 * s->cluster_size); 1900 if (cluster_data == NULL) { 1901 ret = -ENOMEM; 1902 goto fail; 1903 } 1904 } 1905 1906 assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 1907 qemu_iovec_reset(&hd_qiov); 1908 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); 1909 } 1910 1911 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 1912 qemu_co_mutex_unlock(&s->lock); 1913 ret = bdrv_co_preadv(bs->file, 1914 cluster_offset + offset_in_cluster, 1915 cur_bytes, &hd_qiov, 0); 1916 qemu_co_mutex_lock(&s->lock); 1917 if (ret < 0) { 1918 goto fail; 1919 } 1920 if (bs->encrypted) { 1921 assert(s->crypto); 1922 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); 1923 assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); 1924 if (qcrypto_block_decrypt(s->crypto, 1925 (s->crypt_physical_offset ? 1926 cluster_offset + offset_in_cluster : 1927 offset), 1928 cluster_data, 1929 cur_bytes, 1930 NULL) < 0) { 1931 ret = -EIO; 1932 goto fail; 1933 } 1934 qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes); 1935 } 1936 break; 1937 1938 default: 1939 g_assert_not_reached(); 1940 ret = -EIO; 1941 goto fail; 1942 } 1943 1944 bytes -= cur_bytes; 1945 offset += cur_bytes; 1946 bytes_done += cur_bytes; 1947 } 1948 ret = 0; 1949 1950 fail: 1951 qemu_co_mutex_unlock(&s->lock); 1952 1953 qemu_iovec_destroy(&hd_qiov); 1954 qemu_vfree(cluster_data); 1955 1956 return ret; 1957 } 1958 1959 /* Check if it's possible to merge a write request with the writing of 1960 * the data from the COW regions */ 1961 static bool merge_cow(uint64_t offset, unsigned bytes, 1962 QEMUIOVector *hd_qiov, QCowL2Meta *l2meta) 1963 { 1964 QCowL2Meta *m; 1965 1966 for (m = l2meta; m != NULL; m = m->next) { 1967 /* If both COW regions are empty then there's nothing to merge */ 1968 if (m->cow_start.nb_bytes == 0 && m->cow_end.nb_bytes == 0) { 1969 continue; 1970 } 1971 1972 /* The data (middle) region must be immediately after the 1973 * start region */ 1974 if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) { 1975 continue; 1976 } 1977 1978 /* The end region must be immediately after the data (middle) 1979 * region */ 1980 if (m->offset + m->cow_end.offset != offset + bytes) { 1981 continue; 1982 } 1983 1984 /* Make sure that adding both COW regions to the QEMUIOVector 1985 * does not exceed IOV_MAX */ 1986 if (hd_qiov->niov > IOV_MAX - 2) { 1987 continue; 1988 } 1989 1990 m->data_qiov = hd_qiov; 1991 return true; 1992 } 1993 1994 return false; 1995 } 1996 1997 static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, 1998 uint64_t bytes, QEMUIOVector *qiov, 1999 int flags) 2000 { 2001 BDRVQcow2State *s = bs->opaque; 2002 int offset_in_cluster; 2003 int ret; 2004 unsigned int cur_bytes; /* number of sectors in current iteration */ 2005 uint64_t cluster_offset; 2006 QEMUIOVector hd_qiov; 2007 uint64_t bytes_done = 0; 2008 uint8_t *cluster_data = NULL; 2009 QCowL2Meta *l2meta = NULL; 2010 2011 trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); 2012 2013 qemu_iovec_init(&hd_qiov, qiov->niov); 2014 2015 s->cluster_cache_offset = -1; /* disable compressed cache */ 2016 2017 qemu_co_mutex_lock(&s->lock); 2018 2019 while (bytes != 0) { 2020 2021 l2meta = NULL; 2022 2023 trace_qcow2_writev_start_part(qemu_coroutine_self()); 2024 offset_in_cluster = offset_into_cluster(s, offset); 2025 cur_bytes = MIN(bytes, INT_MAX); 2026 if (bs->encrypted) { 2027 cur_bytes = MIN(cur_bytes, 2028 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size 2029 - offset_in_cluster); 2030 } 2031 2032 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, 2033 &cluster_offset, &l2meta); 2034 if (ret < 0) { 2035 goto fail; 2036 } 2037 2038 assert((cluster_offset & 511) == 0); 2039 2040 qemu_iovec_reset(&hd_qiov); 2041 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); 2042 2043 if (bs->encrypted) { 2044 assert(s->crypto); 2045 if (!cluster_data) { 2046 cluster_data = qemu_try_blockalign(bs->file->bs, 2047 QCOW_MAX_CRYPT_CLUSTERS 2048 * s->cluster_size); 2049 if (cluster_data == NULL) { 2050 ret = -ENOMEM; 2051 goto fail; 2052 } 2053 } 2054 2055 assert(hd_qiov.size <= 2056 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 2057 qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); 2058 2059 if (qcrypto_block_encrypt(s->crypto, 2060 (s->crypt_physical_offset ? 2061 cluster_offset + offset_in_cluster : 2062 offset), 2063 cluster_data, 2064 cur_bytes, NULL) < 0) { 2065 ret = -EIO; 2066 goto fail; 2067 } 2068 2069 qemu_iovec_reset(&hd_qiov); 2070 qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); 2071 } 2072 2073 ret = qcow2_pre_write_overlap_check(bs, 0, 2074 cluster_offset + offset_in_cluster, cur_bytes); 2075 if (ret < 0) { 2076 goto fail; 2077 } 2078 2079 /* If we need to do COW, check if it's possible to merge the 2080 * writing of the guest data together with that of the COW regions. 2081 * If it's not possible (or not necessary) then write the 2082 * guest data now. */ 2083 if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) { 2084 qemu_co_mutex_unlock(&s->lock); 2085 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 2086 trace_qcow2_writev_data(qemu_coroutine_self(), 2087 cluster_offset + offset_in_cluster); 2088 ret = bdrv_co_pwritev(bs->file, 2089 cluster_offset + offset_in_cluster, 2090 cur_bytes, &hd_qiov, 0); 2091 qemu_co_mutex_lock(&s->lock); 2092 if (ret < 0) { 2093 goto fail; 2094 } 2095 } 2096 2097 ret = qcow2_handle_l2meta(bs, &l2meta, true); 2098 if (ret) { 2099 goto fail; 2100 } 2101 2102 bytes -= cur_bytes; 2103 offset += cur_bytes; 2104 bytes_done += cur_bytes; 2105 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); 2106 } 2107 ret = 0; 2108 2109 fail: 2110 qcow2_handle_l2meta(bs, &l2meta, false); 2111 2112 qemu_co_mutex_unlock(&s->lock); 2113 2114 qemu_iovec_destroy(&hd_qiov); 2115 qemu_vfree(cluster_data); 2116 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 2117 2118 return ret; 2119 } 2120 2121 static int qcow2_inactivate(BlockDriverState *bs) 2122 { 2123 BDRVQcow2State *s = bs->opaque; 2124 int ret, result = 0; 2125 Error *local_err = NULL; 2126 2127 qcow2_store_persistent_dirty_bitmaps(bs, &local_err); 2128 if (local_err != NULL) { 2129 result = -EINVAL; 2130 error_report_err(local_err); 2131 error_report("Persistent bitmaps are lost for node '%s'", 2132 bdrv_get_device_or_node_name(bs)); 2133 } 2134 2135 ret = qcow2_cache_flush(bs, s->l2_table_cache); 2136 if (ret) { 2137 result = ret; 2138 error_report("Failed to flush the L2 table cache: %s", 2139 strerror(-ret)); 2140 } 2141 2142 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 2143 if (ret) { 2144 result = ret; 2145 error_report("Failed to flush the refcount block cache: %s", 2146 strerror(-ret)); 2147 } 2148 2149 if (result == 0) { 2150 qcow2_mark_clean(bs); 2151 } 2152 2153 return result; 2154 } 2155 2156 static void qcow2_close(BlockDriverState *bs) 2157 { 2158 BDRVQcow2State *s = bs->opaque; 2159 qemu_vfree(s->l1_table); 2160 /* else pre-write overlap checks in cache_destroy may crash */ 2161 s->l1_table = NULL; 2162 2163 if (!(s->flags & BDRV_O_INACTIVE)) { 2164 qcow2_inactivate(bs); 2165 } 2166 2167 cache_clean_timer_del(bs); 2168 qcow2_cache_destroy(s->l2_table_cache); 2169 qcow2_cache_destroy(s->refcount_block_cache); 2170 2171 qcrypto_block_free(s->crypto); 2172 s->crypto = NULL; 2173 2174 g_free(s->unknown_header_fields); 2175 cleanup_unknown_header_ext(bs); 2176 2177 g_free(s->image_backing_file); 2178 g_free(s->image_backing_format); 2179 2180 g_free(s->cluster_cache); 2181 qemu_vfree(s->cluster_data); 2182 qcow2_refcount_close(bs); 2183 qcow2_free_snapshots(bs); 2184 } 2185 2186 static void coroutine_fn qcow2_co_invalidate_cache(BlockDriverState *bs, 2187 Error **errp) 2188 { 2189 BDRVQcow2State *s = bs->opaque; 2190 int flags = s->flags; 2191 QCryptoBlock *crypto = NULL; 2192 QDict *options; 2193 Error *local_err = NULL; 2194 int ret; 2195 2196 /* 2197 * Backing files are read-only which makes all of their metadata immutable, 2198 * that means we don't have to worry about reopening them here. 2199 */ 2200 2201 crypto = s->crypto; 2202 s->crypto = NULL; 2203 2204 qcow2_close(bs); 2205 2206 memset(s, 0, sizeof(BDRVQcow2State)); 2207 options = qdict_clone_shallow(bs->options); 2208 2209 flags &= ~BDRV_O_INACTIVE; 2210 qemu_co_mutex_lock(&s->lock); 2211 ret = qcow2_do_open(bs, options, flags, &local_err); 2212 qemu_co_mutex_unlock(&s->lock); 2213 qobject_unref(options); 2214 if (local_err) { 2215 error_propagate(errp, local_err); 2216 error_prepend(errp, "Could not reopen qcow2 layer: "); 2217 bs->drv = NULL; 2218 return; 2219 } else if (ret < 0) { 2220 error_setg_errno(errp, -ret, "Could not reopen qcow2 layer"); 2221 bs->drv = NULL; 2222 return; 2223 } 2224 2225 s->crypto = crypto; 2226 } 2227 2228 static size_t header_ext_add(char *buf, uint32_t magic, const void *s, 2229 size_t len, size_t buflen) 2230 { 2231 QCowExtension *ext_backing_fmt = (QCowExtension*) buf; 2232 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); 2233 2234 if (buflen < ext_len) { 2235 return -ENOSPC; 2236 } 2237 2238 *ext_backing_fmt = (QCowExtension) { 2239 .magic = cpu_to_be32(magic), 2240 .len = cpu_to_be32(len), 2241 }; 2242 2243 if (len) { 2244 memcpy(buf + sizeof(QCowExtension), s, len); 2245 } 2246 2247 return ext_len; 2248 } 2249 2250 /* 2251 * Updates the qcow2 header, including the variable length parts of it, i.e. 2252 * the backing file name and all extensions. qcow2 was not designed to allow 2253 * such changes, so if we run out of space (we can only use the first cluster) 2254 * this function may fail. 2255 * 2256 * Returns 0 on success, -errno in error cases. 2257 */ 2258 int qcow2_update_header(BlockDriverState *bs) 2259 { 2260 BDRVQcow2State *s = bs->opaque; 2261 QCowHeader *header; 2262 char *buf; 2263 size_t buflen = s->cluster_size; 2264 int ret; 2265 uint64_t total_size; 2266 uint32_t refcount_table_clusters; 2267 size_t header_length; 2268 Qcow2UnknownHeaderExtension *uext; 2269 2270 buf = qemu_blockalign(bs, buflen); 2271 2272 /* Header structure */ 2273 header = (QCowHeader*) buf; 2274 2275 if (buflen < sizeof(*header)) { 2276 ret = -ENOSPC; 2277 goto fail; 2278 } 2279 2280 header_length = sizeof(*header) + s->unknown_header_fields_size; 2281 total_size = bs->total_sectors * BDRV_SECTOR_SIZE; 2282 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); 2283 2284 *header = (QCowHeader) { 2285 /* Version 2 fields */ 2286 .magic = cpu_to_be32(QCOW_MAGIC), 2287 .version = cpu_to_be32(s->qcow_version), 2288 .backing_file_offset = 0, 2289 .backing_file_size = 0, 2290 .cluster_bits = cpu_to_be32(s->cluster_bits), 2291 .size = cpu_to_be64(total_size), 2292 .crypt_method = cpu_to_be32(s->crypt_method_header), 2293 .l1_size = cpu_to_be32(s->l1_size), 2294 .l1_table_offset = cpu_to_be64(s->l1_table_offset), 2295 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset), 2296 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), 2297 .nb_snapshots = cpu_to_be32(s->nb_snapshots), 2298 .snapshots_offset = cpu_to_be64(s->snapshots_offset), 2299 2300 /* Version 3 fields */ 2301 .incompatible_features = cpu_to_be64(s->incompatible_features), 2302 .compatible_features = cpu_to_be64(s->compatible_features), 2303 .autoclear_features = cpu_to_be64(s->autoclear_features), 2304 .refcount_order = cpu_to_be32(s->refcount_order), 2305 .header_length = cpu_to_be32(header_length), 2306 }; 2307 2308 /* For older versions, write a shorter header */ 2309 switch (s->qcow_version) { 2310 case 2: 2311 ret = offsetof(QCowHeader, incompatible_features); 2312 break; 2313 case 3: 2314 ret = sizeof(*header); 2315 break; 2316 default: 2317 ret = -EINVAL; 2318 goto fail; 2319 } 2320 2321 buf += ret; 2322 buflen -= ret; 2323 memset(buf, 0, buflen); 2324 2325 /* Preserve any unknown field in the header */ 2326 if (s->unknown_header_fields_size) { 2327 if (buflen < s->unknown_header_fields_size) { 2328 ret = -ENOSPC; 2329 goto fail; 2330 } 2331 2332 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); 2333 buf += s->unknown_header_fields_size; 2334 buflen -= s->unknown_header_fields_size; 2335 } 2336 2337 /* Backing file format header extension */ 2338 if (s->image_backing_format) { 2339 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, 2340 s->image_backing_format, 2341 strlen(s->image_backing_format), 2342 buflen); 2343 if (ret < 0) { 2344 goto fail; 2345 } 2346 2347 buf += ret; 2348 buflen -= ret; 2349 } 2350 2351 /* Full disk encryption header pointer extension */ 2352 if (s->crypto_header.offset != 0) { 2353 cpu_to_be64s(&s->crypto_header.offset); 2354 cpu_to_be64s(&s->crypto_header.length); 2355 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_CRYPTO_HEADER, 2356 &s->crypto_header, sizeof(s->crypto_header), 2357 buflen); 2358 be64_to_cpus(&s->crypto_header.offset); 2359 be64_to_cpus(&s->crypto_header.length); 2360 if (ret < 0) { 2361 goto fail; 2362 } 2363 buf += ret; 2364 buflen -= ret; 2365 } 2366 2367 /* Feature table */ 2368 if (s->qcow_version >= 3) { 2369 Qcow2Feature features[] = { 2370 { 2371 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2372 .bit = QCOW2_INCOMPAT_DIRTY_BITNR, 2373 .name = "dirty bit", 2374 }, 2375 { 2376 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 2377 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, 2378 .name = "corrupt bit", 2379 }, 2380 { 2381 .type = QCOW2_FEAT_TYPE_COMPATIBLE, 2382 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, 2383 .name = "lazy refcounts", 2384 }, 2385 }; 2386 2387 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, 2388 features, sizeof(features), buflen); 2389 if (ret < 0) { 2390 goto fail; 2391 } 2392 buf += ret; 2393 buflen -= ret; 2394 } 2395 2396 /* Bitmap extension */ 2397 if (s->nb_bitmaps > 0) { 2398 Qcow2BitmapHeaderExt bitmaps_header = { 2399 .nb_bitmaps = cpu_to_be32(s->nb_bitmaps), 2400 .bitmap_directory_size = 2401 cpu_to_be64(s->bitmap_directory_size), 2402 .bitmap_directory_offset = 2403 cpu_to_be64(s->bitmap_directory_offset) 2404 }; 2405 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BITMAPS, 2406 &bitmaps_header, sizeof(bitmaps_header), 2407 buflen); 2408 if (ret < 0) { 2409 goto fail; 2410 } 2411 buf += ret; 2412 buflen -= ret; 2413 } 2414 2415 /* Keep unknown header extensions */ 2416 QLIST_FOREACH(uext, &s->unknown_header_ext, next) { 2417 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); 2418 if (ret < 0) { 2419 goto fail; 2420 } 2421 2422 buf += ret; 2423 buflen -= ret; 2424 } 2425 2426 /* End of header extensions */ 2427 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); 2428 if (ret < 0) { 2429 goto fail; 2430 } 2431 2432 buf += ret; 2433 buflen -= ret; 2434 2435 /* Backing file name */ 2436 if (s->image_backing_file) { 2437 size_t backing_file_len = strlen(s->image_backing_file); 2438 2439 if (buflen < backing_file_len) { 2440 ret = -ENOSPC; 2441 goto fail; 2442 } 2443 2444 /* Using strncpy is ok here, since buf is not NUL-terminated. */ 2445 strncpy(buf, s->image_backing_file, buflen); 2446 2447 header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); 2448 header->backing_file_size = cpu_to_be32(backing_file_len); 2449 } 2450 2451 /* Write the new header */ 2452 ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); 2453 if (ret < 0) { 2454 goto fail; 2455 } 2456 2457 ret = 0; 2458 fail: 2459 qemu_vfree(header); 2460 return ret; 2461 } 2462 2463 static int qcow2_change_backing_file(BlockDriverState *bs, 2464 const char *backing_file, const char *backing_fmt) 2465 { 2466 BDRVQcow2State *s = bs->opaque; 2467 2468 if (backing_file && strlen(backing_file) > 1023) { 2469 return -EINVAL; 2470 } 2471 2472 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 2473 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 2474 2475 g_free(s->image_backing_file); 2476 g_free(s->image_backing_format); 2477 2478 s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL; 2479 s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL; 2480 2481 return qcow2_update_header(bs); 2482 } 2483 2484 static int qcow2_crypt_method_from_format(const char *encryptfmt) 2485 { 2486 if (g_str_equal(encryptfmt, "luks")) { 2487 return QCOW_CRYPT_LUKS; 2488 } else if (g_str_equal(encryptfmt, "aes")) { 2489 return QCOW_CRYPT_AES; 2490 } else { 2491 return -EINVAL; 2492 } 2493 } 2494 2495 static int qcow2_set_up_encryption(BlockDriverState *bs, 2496 QCryptoBlockCreateOptions *cryptoopts, 2497 Error **errp) 2498 { 2499 BDRVQcow2State *s = bs->opaque; 2500 QCryptoBlock *crypto = NULL; 2501 int fmt, ret; 2502 2503 switch (cryptoopts->format) { 2504 case Q_CRYPTO_BLOCK_FORMAT_LUKS: 2505 fmt = QCOW_CRYPT_LUKS; 2506 break; 2507 case Q_CRYPTO_BLOCK_FORMAT_QCOW: 2508 fmt = QCOW_CRYPT_AES; 2509 break; 2510 default: 2511 error_setg(errp, "Crypto format not supported in qcow2"); 2512 return -EINVAL; 2513 } 2514 2515 s->crypt_method_header = fmt; 2516 2517 crypto = qcrypto_block_create(cryptoopts, "encrypt.", 2518 qcow2_crypto_hdr_init_func, 2519 qcow2_crypto_hdr_write_func, 2520 bs, errp); 2521 if (!crypto) { 2522 return -EINVAL; 2523 } 2524 2525 ret = qcow2_update_header(bs); 2526 if (ret < 0) { 2527 error_setg_errno(errp, -ret, "Could not write encryption header"); 2528 goto out; 2529 } 2530 2531 ret = 0; 2532 out: 2533 qcrypto_block_free(crypto); 2534 return ret; 2535 } 2536 2537 /** 2538 * Preallocates metadata structures for data clusters between @offset (in the 2539 * guest disk) and @new_length (which is thus generally the new guest disk 2540 * size). 2541 * 2542 * Returns: 0 on success, -errno on failure. 2543 */ 2544 static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset, 2545 uint64_t new_length) 2546 { 2547 uint64_t bytes; 2548 uint64_t host_offset = 0; 2549 unsigned int cur_bytes; 2550 int ret; 2551 QCowL2Meta *meta; 2552 2553 assert(offset <= new_length); 2554 bytes = new_length - offset; 2555 2556 while (bytes) { 2557 cur_bytes = MIN(bytes, INT_MAX); 2558 ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, 2559 &host_offset, &meta); 2560 if (ret < 0) { 2561 return ret; 2562 } 2563 2564 while (meta) { 2565 QCowL2Meta *next = meta->next; 2566 2567 ret = qcow2_alloc_cluster_link_l2(bs, meta); 2568 if (ret < 0) { 2569 qcow2_free_any_clusters(bs, meta->alloc_offset, 2570 meta->nb_clusters, QCOW2_DISCARD_NEVER); 2571 return ret; 2572 } 2573 2574 /* There are no dependent requests, but we need to remove our 2575 * request from the list of in-flight requests */ 2576 QLIST_REMOVE(meta, next_in_flight); 2577 2578 g_free(meta); 2579 meta = next; 2580 } 2581 2582 /* TODO Preallocate data if requested */ 2583 2584 bytes -= cur_bytes; 2585 offset += cur_bytes; 2586 } 2587 2588 /* 2589 * It is expected that the image file is large enough to actually contain 2590 * all of the allocated clusters (otherwise we get failing reads after 2591 * EOF). Extend the image to the last allocated sector. 2592 */ 2593 if (host_offset != 0) { 2594 uint8_t data = 0; 2595 ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1, 2596 &data, 1); 2597 if (ret < 0) { 2598 return ret; 2599 } 2600 } 2601 2602 return 0; 2603 } 2604 2605 /* qcow2_refcount_metadata_size: 2606 * @clusters: number of clusters to refcount (including data and L1/L2 tables) 2607 * @cluster_size: size of a cluster, in bytes 2608 * @refcount_order: refcount bits power-of-2 exponent 2609 * @generous_increase: allow for the refcount table to be 1.5x as large as it 2610 * needs to be 2611 * 2612 * Returns: Number of bytes required for refcount blocks and table metadata. 2613 */ 2614 int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, 2615 int refcount_order, bool generous_increase, 2616 uint64_t *refblock_count) 2617 { 2618 /* 2619 * Every host cluster is reference-counted, including metadata (even 2620 * refcount metadata is recursively included). 2621 * 2622 * An accurate formula for the size of refcount metadata size is difficult 2623 * to derive. An easier method of calculation is finding the fixed point 2624 * where no further refcount blocks or table clusters are required to 2625 * reference count every cluster. 2626 */ 2627 int64_t blocks_per_table_cluster = cluster_size / sizeof(uint64_t); 2628 int64_t refcounts_per_block = cluster_size * 8 / (1 << refcount_order); 2629 int64_t table = 0; /* number of refcount table clusters */ 2630 int64_t blocks = 0; /* number of refcount block clusters */ 2631 int64_t last; 2632 int64_t n = 0; 2633 2634 do { 2635 last = n; 2636 blocks = DIV_ROUND_UP(clusters + table + blocks, refcounts_per_block); 2637 table = DIV_ROUND_UP(blocks, blocks_per_table_cluster); 2638 n = clusters + blocks + table; 2639 2640 if (n == last && generous_increase) { 2641 clusters += DIV_ROUND_UP(table, 2); 2642 n = 0; /* force another loop */ 2643 generous_increase = false; 2644 } 2645 } while (n != last); 2646 2647 if (refblock_count) { 2648 *refblock_count = blocks; 2649 } 2650 2651 return (blocks + table) * cluster_size; 2652 } 2653 2654 /** 2655 * qcow2_calc_prealloc_size: 2656 * @total_size: virtual disk size in bytes 2657 * @cluster_size: cluster size in bytes 2658 * @refcount_order: refcount bits power-of-2 exponent 2659 * 2660 * Returns: Total number of bytes required for the fully allocated image 2661 * (including metadata). 2662 */ 2663 static int64_t qcow2_calc_prealloc_size(int64_t total_size, 2664 size_t cluster_size, 2665 int refcount_order) 2666 { 2667 int64_t meta_size = 0; 2668 uint64_t nl1e, nl2e; 2669 int64_t aligned_total_size = ROUND_UP(total_size, cluster_size); 2670 2671 /* header: 1 cluster */ 2672 meta_size += cluster_size; 2673 2674 /* total size of L2 tables */ 2675 nl2e = aligned_total_size / cluster_size; 2676 nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t)); 2677 meta_size += nl2e * sizeof(uint64_t); 2678 2679 /* total size of L1 tables */ 2680 nl1e = nl2e * sizeof(uint64_t) / cluster_size; 2681 nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t)); 2682 meta_size += nl1e * sizeof(uint64_t); 2683 2684 /* total size of refcount table and blocks */ 2685 meta_size += qcow2_refcount_metadata_size( 2686 (meta_size + aligned_total_size) / cluster_size, 2687 cluster_size, refcount_order, false, NULL); 2688 2689 return meta_size + aligned_total_size; 2690 } 2691 2692 static bool validate_cluster_size(size_t cluster_size, Error **errp) 2693 { 2694 int cluster_bits = ctz32(cluster_size); 2695 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || 2696 (1 << cluster_bits) != cluster_size) 2697 { 2698 error_setg(errp, "Cluster size must be a power of two between %d and " 2699 "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); 2700 return false; 2701 } 2702 return true; 2703 } 2704 2705 static size_t qcow2_opt_get_cluster_size_del(QemuOpts *opts, Error **errp) 2706 { 2707 size_t cluster_size; 2708 2709 cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 2710 DEFAULT_CLUSTER_SIZE); 2711 if (!validate_cluster_size(cluster_size, errp)) { 2712 return 0; 2713 } 2714 return cluster_size; 2715 } 2716 2717 static int qcow2_opt_get_version_del(QemuOpts *opts, Error **errp) 2718 { 2719 char *buf; 2720 int ret; 2721 2722 buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL); 2723 if (!buf) { 2724 ret = 3; /* default */ 2725 } else if (!strcmp(buf, "0.10")) { 2726 ret = 2; 2727 } else if (!strcmp(buf, "1.1")) { 2728 ret = 3; 2729 } else { 2730 error_setg(errp, "Invalid compatibility level: '%s'", buf); 2731 ret = -EINVAL; 2732 } 2733 g_free(buf); 2734 return ret; 2735 } 2736 2737 static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version, 2738 Error **errp) 2739 { 2740 uint64_t refcount_bits; 2741 2742 refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, 16); 2743 if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { 2744 error_setg(errp, "Refcount width must be a power of two and may not " 2745 "exceed 64 bits"); 2746 return 0; 2747 } 2748 2749 if (version < 3 && refcount_bits != 16) { 2750 error_setg(errp, "Different refcount widths than 16 bits require " 2751 "compatibility level 1.1 or above (use compat=1.1 or " 2752 "greater)"); 2753 return 0; 2754 } 2755 2756 return refcount_bits; 2757 } 2758 2759 static int coroutine_fn 2760 qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) 2761 { 2762 BlockdevCreateOptionsQcow2 *qcow2_opts; 2763 QDict *options; 2764 2765 /* 2766 * Open the image file and write a minimal qcow2 header. 2767 * 2768 * We keep things simple and start with a zero-sized image. We also 2769 * do without refcount blocks or a L1 table for now. We'll fix the 2770 * inconsistency later. 2771 * 2772 * We do need a refcount table because growing the refcount table means 2773 * allocating two new refcount blocks - the seconds of which would be at 2774 * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file 2775 * size for any qcow2 image. 2776 */ 2777 BlockBackend *blk = NULL; 2778 BlockDriverState *bs = NULL; 2779 QCowHeader *header; 2780 size_t cluster_size; 2781 int version; 2782 int refcount_order; 2783 uint64_t* refcount_table; 2784 Error *local_err = NULL; 2785 int ret; 2786 2787 assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2); 2788 qcow2_opts = &create_options->u.qcow2; 2789 2790 bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp); 2791 if (bs == NULL) { 2792 return -EIO; 2793 } 2794 2795 /* Validate options and set default values */ 2796 if (!QEMU_IS_ALIGNED(qcow2_opts->size, BDRV_SECTOR_SIZE)) { 2797 error_setg(errp, "Image size must be a multiple of 512 bytes"); 2798 ret = -EINVAL; 2799 goto out; 2800 } 2801 2802 if (qcow2_opts->has_version) { 2803 switch (qcow2_opts->version) { 2804 case BLOCKDEV_QCOW2_VERSION_V2: 2805 version = 2; 2806 break; 2807 case BLOCKDEV_QCOW2_VERSION_V3: 2808 version = 3; 2809 break; 2810 default: 2811 g_assert_not_reached(); 2812 } 2813 } else { 2814 version = 3; 2815 } 2816 2817 if (qcow2_opts->has_cluster_size) { 2818 cluster_size = qcow2_opts->cluster_size; 2819 } else { 2820 cluster_size = DEFAULT_CLUSTER_SIZE; 2821 } 2822 2823 if (!validate_cluster_size(cluster_size, errp)) { 2824 ret = -EINVAL; 2825 goto out; 2826 } 2827 2828 if (!qcow2_opts->has_preallocation) { 2829 qcow2_opts->preallocation = PREALLOC_MODE_OFF; 2830 } 2831 if (qcow2_opts->has_backing_file && 2832 qcow2_opts->preallocation != PREALLOC_MODE_OFF) 2833 { 2834 error_setg(errp, "Backing file and preallocation cannot be used at " 2835 "the same time"); 2836 ret = -EINVAL; 2837 goto out; 2838 } 2839 if (qcow2_opts->has_backing_fmt && !qcow2_opts->has_backing_file) { 2840 error_setg(errp, "Backing format cannot be used without backing file"); 2841 ret = -EINVAL; 2842 goto out; 2843 } 2844 2845 if (!qcow2_opts->has_lazy_refcounts) { 2846 qcow2_opts->lazy_refcounts = false; 2847 } 2848 if (version < 3 && qcow2_opts->lazy_refcounts) { 2849 error_setg(errp, "Lazy refcounts only supported with compatibility " 2850 "level 1.1 and above (use version=v3 or greater)"); 2851 ret = -EINVAL; 2852 goto out; 2853 } 2854 2855 if (!qcow2_opts->has_refcount_bits) { 2856 qcow2_opts->refcount_bits = 16; 2857 } 2858 if (qcow2_opts->refcount_bits > 64 || 2859 !is_power_of_2(qcow2_opts->refcount_bits)) 2860 { 2861 error_setg(errp, "Refcount width must be a power of two and may not " 2862 "exceed 64 bits"); 2863 ret = -EINVAL; 2864 goto out; 2865 } 2866 if (version < 3 && qcow2_opts->refcount_bits != 16) { 2867 error_setg(errp, "Different refcount widths than 16 bits require " 2868 "compatibility level 1.1 or above (use version=v3 or " 2869 "greater)"); 2870 ret = -EINVAL; 2871 goto out; 2872 } 2873 refcount_order = ctz32(qcow2_opts->refcount_bits); 2874 2875 2876 /* Create BlockBackend to write to the image */ 2877 blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); 2878 ret = blk_insert_bs(blk, bs, errp); 2879 if (ret < 0) { 2880 goto out; 2881 } 2882 blk_set_allow_write_beyond_eof(blk, true); 2883 2884 /* Clear the protocol layer and preallocate it if necessary */ 2885 ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp); 2886 if (ret < 0) { 2887 goto out; 2888 } 2889 2890 if (qcow2_opts->preallocation == PREALLOC_MODE_FULL || 2891 qcow2_opts->preallocation == PREALLOC_MODE_FALLOC) 2892 { 2893 int64_t prealloc_size = 2894 qcow2_calc_prealloc_size(qcow2_opts->size, cluster_size, 2895 refcount_order); 2896 2897 ret = blk_truncate(blk, prealloc_size, qcow2_opts->preallocation, errp); 2898 if (ret < 0) { 2899 goto out; 2900 } 2901 } 2902 2903 /* Write the header */ 2904 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); 2905 header = g_malloc0(cluster_size); 2906 *header = (QCowHeader) { 2907 .magic = cpu_to_be32(QCOW_MAGIC), 2908 .version = cpu_to_be32(version), 2909 .cluster_bits = cpu_to_be32(ctz32(cluster_size)), 2910 .size = cpu_to_be64(0), 2911 .l1_table_offset = cpu_to_be64(0), 2912 .l1_size = cpu_to_be32(0), 2913 .refcount_table_offset = cpu_to_be64(cluster_size), 2914 .refcount_table_clusters = cpu_to_be32(1), 2915 .refcount_order = cpu_to_be32(refcount_order), 2916 .header_length = cpu_to_be32(sizeof(*header)), 2917 }; 2918 2919 /* We'll update this to correct value later */ 2920 header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 2921 2922 if (qcow2_opts->lazy_refcounts) { 2923 header->compatible_features |= 2924 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); 2925 } 2926 2927 ret = blk_pwrite(blk, 0, header, cluster_size, 0); 2928 g_free(header); 2929 if (ret < 0) { 2930 error_setg_errno(errp, -ret, "Could not write qcow2 header"); 2931 goto out; 2932 } 2933 2934 /* Write a refcount table with one refcount block */ 2935 refcount_table = g_malloc0(2 * cluster_size); 2936 refcount_table[0] = cpu_to_be64(2 * cluster_size); 2937 ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0); 2938 g_free(refcount_table); 2939 2940 if (ret < 0) { 2941 error_setg_errno(errp, -ret, "Could not write refcount table"); 2942 goto out; 2943 } 2944 2945 blk_unref(blk); 2946 blk = NULL; 2947 2948 /* 2949 * And now open the image and make it consistent first (i.e. increase the 2950 * refcount of the cluster that is occupied by the header and the refcount 2951 * table) 2952 */ 2953 options = qdict_new(); 2954 qdict_put_str(options, "driver", "qcow2"); 2955 qdict_put_str(options, "file", bs->node_name); 2956 blk = blk_new_open(NULL, NULL, options, 2957 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH, 2958 &local_err); 2959 if (blk == NULL) { 2960 error_propagate(errp, local_err); 2961 ret = -EIO; 2962 goto out; 2963 } 2964 2965 ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); 2966 if (ret < 0) { 2967 error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " 2968 "header and refcount table"); 2969 goto out; 2970 2971 } else if (ret != 0) { 2972 error_report("Huh, first cluster in empty image is already in use?"); 2973 abort(); 2974 } 2975 2976 /* Create a full header (including things like feature table) */ 2977 ret = qcow2_update_header(blk_bs(blk)); 2978 if (ret < 0) { 2979 error_setg_errno(errp, -ret, "Could not update qcow2 header"); 2980 goto out; 2981 } 2982 2983 /* Okay, now that we have a valid image, let's give it the right size */ 2984 ret = blk_truncate(blk, qcow2_opts->size, PREALLOC_MODE_OFF, errp); 2985 if (ret < 0) { 2986 error_prepend(errp, "Could not resize image: "); 2987 goto out; 2988 } 2989 2990 /* Want a backing file? There you go.*/ 2991 if (qcow2_opts->has_backing_file) { 2992 const char *backing_format = NULL; 2993 2994 if (qcow2_opts->has_backing_fmt) { 2995 backing_format = BlockdevDriver_str(qcow2_opts->backing_fmt); 2996 } 2997 2998 ret = bdrv_change_backing_file(blk_bs(blk), qcow2_opts->backing_file, 2999 backing_format); 3000 if (ret < 0) { 3001 error_setg_errno(errp, -ret, "Could not assign backing file '%s' " 3002 "with format '%s'", qcow2_opts->backing_file, 3003 backing_format); 3004 goto out; 3005 } 3006 } 3007 3008 /* Want encryption? There you go. */ 3009 if (qcow2_opts->has_encrypt) { 3010 ret = qcow2_set_up_encryption(blk_bs(blk), qcow2_opts->encrypt, errp); 3011 if (ret < 0) { 3012 goto out; 3013 } 3014 } 3015 3016 /* And if we're supposed to preallocate metadata, do that now */ 3017 if (qcow2_opts->preallocation != PREALLOC_MODE_OFF) { 3018 BDRVQcow2State *s = blk_bs(blk)->opaque; 3019 qemu_co_mutex_lock(&s->lock); 3020 ret = preallocate_co(blk_bs(blk), 0, qcow2_opts->size); 3021 qemu_co_mutex_unlock(&s->lock); 3022 3023 if (ret < 0) { 3024 error_setg_errno(errp, -ret, "Could not preallocate metadata"); 3025 goto out; 3026 } 3027 } 3028 3029 blk_unref(blk); 3030 blk = NULL; 3031 3032 /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. 3033 * Using BDRV_O_NO_IO, since encryption is now setup we don't want to 3034 * have to setup decryption context. We're not doing any I/O on the top 3035 * level BlockDriverState, only lower layers, where BDRV_O_NO_IO does 3036 * not have effect. 3037 */ 3038 options = qdict_new(); 3039 qdict_put_str(options, "driver", "qcow2"); 3040 qdict_put_str(options, "file", bs->node_name); 3041 blk = blk_new_open(NULL, NULL, options, 3042 BDRV_O_RDWR | BDRV_O_NO_BACKING | BDRV_O_NO_IO, 3043 &local_err); 3044 if (blk == NULL) { 3045 error_propagate(errp, local_err); 3046 ret = -EIO; 3047 goto out; 3048 } 3049 3050 ret = 0; 3051 out: 3052 blk_unref(blk); 3053 bdrv_unref(bs); 3054 return ret; 3055 } 3056 3057 static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, 3058 Error **errp) 3059 { 3060 BlockdevCreateOptions *create_options = NULL; 3061 QDict *qdict; 3062 Visitor *v; 3063 BlockDriverState *bs = NULL; 3064 Error *local_err = NULL; 3065 const char *val; 3066 int ret; 3067 3068 /* Only the keyval visitor supports the dotted syntax needed for 3069 * encryption, so go through a QDict before getting a QAPI type. Ignore 3070 * options meant for the protocol layer so that the visitor doesn't 3071 * complain. */ 3072 qdict = qemu_opts_to_qdict_filtered(opts, NULL, bdrv_qcow2.create_opts, 3073 true); 3074 3075 /* Handle encryption options */ 3076 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT); 3077 if (val && !strcmp(val, "on")) { 3078 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow"); 3079 } else if (val && !strcmp(val, "off")) { 3080 qdict_del(qdict, BLOCK_OPT_ENCRYPT); 3081 } 3082 3083 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT); 3084 if (val && !strcmp(val, "aes")) { 3085 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow"); 3086 } 3087 3088 /* Convert compat=0.10/1.1 into compat=v2/v3, to be renamed into 3089 * version=v2/v3 below. */ 3090 val = qdict_get_try_str(qdict, BLOCK_OPT_COMPAT_LEVEL); 3091 if (val && !strcmp(val, "0.10")) { 3092 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v2"); 3093 } else if (val && !strcmp(val, "1.1")) { 3094 qdict_put_str(qdict, BLOCK_OPT_COMPAT_LEVEL, "v3"); 3095 } 3096 3097 /* Change legacy command line options into QMP ones */ 3098 static const QDictRenames opt_renames[] = { 3099 { BLOCK_OPT_BACKING_FILE, "backing-file" }, 3100 { BLOCK_OPT_BACKING_FMT, "backing-fmt" }, 3101 { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" }, 3102 { BLOCK_OPT_LAZY_REFCOUNTS, "lazy-refcounts" }, 3103 { BLOCK_OPT_REFCOUNT_BITS, "refcount-bits" }, 3104 { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT }, 3105 { BLOCK_OPT_COMPAT_LEVEL, "version" }, 3106 { NULL, NULL }, 3107 }; 3108 3109 if (!qdict_rename_keys(qdict, opt_renames, errp)) { 3110 ret = -EINVAL; 3111 goto finish; 3112 } 3113 3114 /* Create and open the file (protocol layer) */ 3115 ret = bdrv_create_file(filename, opts, errp); 3116 if (ret < 0) { 3117 goto finish; 3118 } 3119 3120 bs = bdrv_open(filename, NULL, NULL, 3121 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); 3122 if (bs == NULL) { 3123 ret = -EIO; 3124 goto finish; 3125 } 3126 3127 /* Set 'driver' and 'node' options */ 3128 qdict_put_str(qdict, "driver", "qcow2"); 3129 qdict_put_str(qdict, "file", bs->node_name); 3130 3131 /* Now get the QAPI type BlockdevCreateOptions */ 3132 v = qobject_input_visitor_new_flat_confused(qdict, errp); 3133 if (!v) { 3134 ret = -EINVAL; 3135 goto finish; 3136 } 3137 3138 visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err); 3139 visit_free(v); 3140 3141 if (local_err) { 3142 error_propagate(errp, local_err); 3143 ret = -EINVAL; 3144 goto finish; 3145 } 3146 3147 /* Silently round up size */ 3148 create_options->u.qcow2.size = ROUND_UP(create_options->u.qcow2.size, 3149 BDRV_SECTOR_SIZE); 3150 3151 /* Create the qcow2 image (format layer) */ 3152 ret = qcow2_co_create(create_options, errp); 3153 if (ret < 0) { 3154 goto finish; 3155 } 3156 3157 ret = 0; 3158 finish: 3159 qobject_unref(qdict); 3160 bdrv_unref(bs); 3161 qapi_free_BlockdevCreateOptions(create_options); 3162 return ret; 3163 } 3164 3165 3166 static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) 3167 { 3168 int64_t nr; 3169 int res; 3170 3171 /* Clamp to image length, before checking status of underlying sectors */ 3172 if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { 3173 bytes = bs->total_sectors * BDRV_SECTOR_SIZE - offset; 3174 } 3175 3176 if (!bytes) { 3177 return true; 3178 } 3179 res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL); 3180 return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes; 3181 } 3182 3183 static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs, 3184 int64_t offset, int bytes, BdrvRequestFlags flags) 3185 { 3186 int ret; 3187 BDRVQcow2State *s = bs->opaque; 3188 3189 uint32_t head = offset % s->cluster_size; 3190 uint32_t tail = (offset + bytes) % s->cluster_size; 3191 3192 trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, bytes); 3193 if (offset + bytes == bs->total_sectors * BDRV_SECTOR_SIZE) { 3194 tail = 0; 3195 } 3196 3197 if (head || tail) { 3198 uint64_t off; 3199 unsigned int nr; 3200 3201 assert(head + bytes <= s->cluster_size); 3202 3203 /* check whether remainder of cluster already reads as zero */ 3204 if (!(is_zero(bs, offset - head, head) && 3205 is_zero(bs, offset + bytes, 3206 tail ? s->cluster_size - tail : 0))) { 3207 return -ENOTSUP; 3208 } 3209 3210 qemu_co_mutex_lock(&s->lock); 3211 /* We can have new write after previous check */ 3212 offset = QEMU_ALIGN_DOWN(offset, s->cluster_size); 3213 bytes = s->cluster_size; 3214 nr = s->cluster_size; 3215 ret = qcow2_get_cluster_offset(bs, offset, &nr, &off); 3216 if (ret != QCOW2_CLUSTER_UNALLOCATED && 3217 ret != QCOW2_CLUSTER_ZERO_PLAIN && 3218 ret != QCOW2_CLUSTER_ZERO_ALLOC) { 3219 qemu_co_mutex_unlock(&s->lock); 3220 return -ENOTSUP; 3221 } 3222 } else { 3223 qemu_co_mutex_lock(&s->lock); 3224 } 3225 3226 trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, bytes); 3227 3228 /* Whatever is left can use real zero clusters */ 3229 ret = qcow2_cluster_zeroize(bs, offset, bytes, flags); 3230 qemu_co_mutex_unlock(&s->lock); 3231 3232 return ret; 3233 } 3234 3235 static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, 3236 int64_t offset, int bytes) 3237 { 3238 int ret; 3239 BDRVQcow2State *s = bs->opaque; 3240 3241 if (!QEMU_IS_ALIGNED(offset | bytes, s->cluster_size)) { 3242 assert(bytes < s->cluster_size); 3243 /* Ignore partial clusters, except for the special case of the 3244 * complete partial cluster at the end of an unaligned file */ 3245 if (!QEMU_IS_ALIGNED(offset, s->cluster_size) || 3246 offset + bytes != bs->total_sectors * BDRV_SECTOR_SIZE) { 3247 return -ENOTSUP; 3248 } 3249 } 3250 3251 qemu_co_mutex_lock(&s->lock); 3252 ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST, 3253 false); 3254 qemu_co_mutex_unlock(&s->lock); 3255 return ret; 3256 } 3257 3258 static int coroutine_fn 3259 qcow2_co_copy_range_from(BlockDriverState *bs, 3260 BdrvChild *src, uint64_t src_offset, 3261 BdrvChild *dst, uint64_t dst_offset, 3262 uint64_t bytes, BdrvRequestFlags read_flags, 3263 BdrvRequestFlags write_flags) 3264 { 3265 BDRVQcow2State *s = bs->opaque; 3266 int ret; 3267 unsigned int cur_bytes; /* number of bytes in current iteration */ 3268 BdrvChild *child = NULL; 3269 BdrvRequestFlags cur_write_flags; 3270 3271 assert(!bs->encrypted); 3272 qemu_co_mutex_lock(&s->lock); 3273 3274 while (bytes != 0) { 3275 uint64_t copy_offset = 0; 3276 /* prepare next request */ 3277 cur_bytes = MIN(bytes, INT_MAX); 3278 cur_write_flags = write_flags; 3279 3280 ret = qcow2_get_cluster_offset(bs, src_offset, &cur_bytes, ©_offset); 3281 if (ret < 0) { 3282 goto out; 3283 } 3284 3285 switch (ret) { 3286 case QCOW2_CLUSTER_UNALLOCATED: 3287 if (bs->backing && bs->backing->bs) { 3288 int64_t backing_length = bdrv_getlength(bs->backing->bs); 3289 if (src_offset >= backing_length) { 3290 cur_write_flags |= BDRV_REQ_ZERO_WRITE; 3291 } else { 3292 child = bs->backing; 3293 cur_bytes = MIN(cur_bytes, backing_length - src_offset); 3294 copy_offset = src_offset; 3295 } 3296 } else { 3297 cur_write_flags |= BDRV_REQ_ZERO_WRITE; 3298 } 3299 break; 3300 3301 case QCOW2_CLUSTER_ZERO_PLAIN: 3302 case QCOW2_CLUSTER_ZERO_ALLOC: 3303 cur_write_flags |= BDRV_REQ_ZERO_WRITE; 3304 break; 3305 3306 case QCOW2_CLUSTER_COMPRESSED: 3307 ret = -ENOTSUP; 3308 goto out; 3309 3310 case QCOW2_CLUSTER_NORMAL: 3311 child = bs->file; 3312 copy_offset += offset_into_cluster(s, src_offset); 3313 if ((copy_offset & 511) != 0) { 3314 ret = -EIO; 3315 goto out; 3316 } 3317 break; 3318 3319 default: 3320 abort(); 3321 } 3322 qemu_co_mutex_unlock(&s->lock); 3323 ret = bdrv_co_copy_range_from(child, 3324 copy_offset, 3325 dst, dst_offset, 3326 cur_bytes, read_flags, cur_write_flags); 3327 qemu_co_mutex_lock(&s->lock); 3328 if (ret < 0) { 3329 goto out; 3330 } 3331 3332 bytes -= cur_bytes; 3333 src_offset += cur_bytes; 3334 dst_offset += cur_bytes; 3335 } 3336 ret = 0; 3337 3338 out: 3339 qemu_co_mutex_unlock(&s->lock); 3340 return ret; 3341 } 3342 3343 static int coroutine_fn 3344 qcow2_co_copy_range_to(BlockDriverState *bs, 3345 BdrvChild *src, uint64_t src_offset, 3346 BdrvChild *dst, uint64_t dst_offset, 3347 uint64_t bytes, BdrvRequestFlags read_flags, 3348 BdrvRequestFlags write_flags) 3349 { 3350 BDRVQcow2State *s = bs->opaque; 3351 int offset_in_cluster; 3352 int ret; 3353 unsigned int cur_bytes; /* number of sectors in current iteration */ 3354 uint64_t cluster_offset; 3355 QCowL2Meta *l2meta = NULL; 3356 3357 assert(!bs->encrypted); 3358 s->cluster_cache_offset = -1; /* disable compressed cache */ 3359 3360 qemu_co_mutex_lock(&s->lock); 3361 3362 while (bytes != 0) { 3363 3364 l2meta = NULL; 3365 3366 offset_in_cluster = offset_into_cluster(s, dst_offset); 3367 cur_bytes = MIN(bytes, INT_MAX); 3368 3369 /* TODO: 3370 * If src->bs == dst->bs, we could simply copy by incrementing 3371 * the refcnt, without copying user data. 3372 * Or if src->bs == dst->bs->backing->bs, we could copy by discarding. */ 3373 ret = qcow2_alloc_cluster_offset(bs, dst_offset, &cur_bytes, 3374 &cluster_offset, &l2meta); 3375 if (ret < 0) { 3376 goto fail; 3377 } 3378 3379 assert((cluster_offset & 511) == 0); 3380 3381 ret = qcow2_pre_write_overlap_check(bs, 0, 3382 cluster_offset + offset_in_cluster, cur_bytes); 3383 if (ret < 0) { 3384 goto fail; 3385 } 3386 3387 qemu_co_mutex_unlock(&s->lock); 3388 ret = bdrv_co_copy_range_to(src, src_offset, 3389 bs->file, 3390 cluster_offset + offset_in_cluster, 3391 cur_bytes, read_flags, write_flags); 3392 qemu_co_mutex_lock(&s->lock); 3393 if (ret < 0) { 3394 goto fail; 3395 } 3396 3397 ret = qcow2_handle_l2meta(bs, &l2meta, true); 3398 if (ret) { 3399 goto fail; 3400 } 3401 3402 bytes -= cur_bytes; 3403 src_offset += cur_bytes; 3404 dst_offset += cur_bytes; 3405 } 3406 ret = 0; 3407 3408 fail: 3409 qcow2_handle_l2meta(bs, &l2meta, false); 3410 3411 qemu_co_mutex_unlock(&s->lock); 3412 3413 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 3414 3415 return ret; 3416 } 3417 3418 static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset, 3419 PreallocMode prealloc, Error **errp) 3420 { 3421 BDRVQcow2State *s = bs->opaque; 3422 uint64_t old_length; 3423 int64_t new_l1_size; 3424 int ret; 3425 3426 if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_METADATA && 3427 prealloc != PREALLOC_MODE_FALLOC && prealloc != PREALLOC_MODE_FULL) 3428 { 3429 error_setg(errp, "Unsupported preallocation mode '%s'", 3430 PreallocMode_str(prealloc)); 3431 return -ENOTSUP; 3432 } 3433 3434 if (offset & 511) { 3435 error_setg(errp, "The new size must be a multiple of 512"); 3436 return -EINVAL; 3437 } 3438 3439 qemu_co_mutex_lock(&s->lock); 3440 3441 /* cannot proceed if image has snapshots */ 3442 if (s->nb_snapshots) { 3443 error_setg(errp, "Can't resize an image which has snapshots"); 3444 ret = -ENOTSUP; 3445 goto fail; 3446 } 3447 3448 /* cannot proceed if image has bitmaps */ 3449 if (s->nb_bitmaps) { 3450 /* TODO: resize bitmaps in the image */ 3451 error_setg(errp, "Can't resize an image which has bitmaps"); 3452 ret = -ENOTSUP; 3453 goto fail; 3454 } 3455 3456 old_length = bs->total_sectors * 512; 3457 new_l1_size = size_to_l1(s, offset); 3458 3459 if (offset < old_length) { 3460 int64_t last_cluster, old_file_size; 3461 if (prealloc != PREALLOC_MODE_OFF) { 3462 error_setg(errp, 3463 "Preallocation can't be used for shrinking an image"); 3464 ret = -EINVAL; 3465 goto fail; 3466 } 3467 3468 ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), 3469 old_length - ROUND_UP(offset, 3470 s->cluster_size), 3471 QCOW2_DISCARD_ALWAYS, true); 3472 if (ret < 0) { 3473 error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); 3474 goto fail; 3475 } 3476 3477 ret = qcow2_shrink_l1_table(bs, new_l1_size); 3478 if (ret < 0) { 3479 error_setg_errno(errp, -ret, 3480 "Failed to reduce the number of L2 tables"); 3481 goto fail; 3482 } 3483 3484 ret = qcow2_shrink_reftable(bs); 3485 if (ret < 0) { 3486 error_setg_errno(errp, -ret, 3487 "Failed to discard unused refblocks"); 3488 goto fail; 3489 } 3490 3491 old_file_size = bdrv_getlength(bs->file->bs); 3492 if (old_file_size < 0) { 3493 error_setg_errno(errp, -old_file_size, 3494 "Failed to inquire current file length"); 3495 ret = old_file_size; 3496 goto fail; 3497 } 3498 last_cluster = qcow2_get_last_cluster(bs, old_file_size); 3499 if (last_cluster < 0) { 3500 error_setg_errno(errp, -last_cluster, 3501 "Failed to find the last cluster"); 3502 ret = last_cluster; 3503 goto fail; 3504 } 3505 if ((last_cluster + 1) * s->cluster_size < old_file_size) { 3506 Error *local_err = NULL; 3507 3508 bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size, 3509 PREALLOC_MODE_OFF, &local_err); 3510 if (local_err) { 3511 warn_reportf_err(local_err, 3512 "Failed to truncate the tail of the image: "); 3513 } 3514 } 3515 } else { 3516 ret = qcow2_grow_l1_table(bs, new_l1_size, true); 3517 if (ret < 0) { 3518 error_setg_errno(errp, -ret, "Failed to grow the L1 table"); 3519 goto fail; 3520 } 3521 } 3522 3523 switch (prealloc) { 3524 case PREALLOC_MODE_OFF: 3525 break; 3526 3527 case PREALLOC_MODE_METADATA: 3528 ret = preallocate_co(bs, old_length, offset); 3529 if (ret < 0) { 3530 error_setg_errno(errp, -ret, "Preallocation failed"); 3531 goto fail; 3532 } 3533 break; 3534 3535 case PREALLOC_MODE_FALLOC: 3536 case PREALLOC_MODE_FULL: 3537 { 3538 int64_t allocation_start, host_offset, guest_offset; 3539 int64_t clusters_allocated; 3540 int64_t old_file_size, new_file_size; 3541 uint64_t nb_new_data_clusters, nb_new_l2_tables; 3542 3543 old_file_size = bdrv_getlength(bs->file->bs); 3544 if (old_file_size < 0) { 3545 error_setg_errno(errp, -old_file_size, 3546 "Failed to inquire current file length"); 3547 ret = old_file_size; 3548 goto fail; 3549 } 3550 old_file_size = ROUND_UP(old_file_size, s->cluster_size); 3551 3552 nb_new_data_clusters = DIV_ROUND_UP(offset - old_length, 3553 s->cluster_size); 3554 3555 /* This is an overestimation; we will not actually allocate space for 3556 * these in the file but just make sure the new refcount structures are 3557 * able to cover them so we will not have to allocate new refblocks 3558 * while entering the data blocks in the potentially new L2 tables. 3559 * (We do not actually care where the L2 tables are placed. Maybe they 3560 * are already allocated or they can be placed somewhere before 3561 * @old_file_size. It does not matter because they will be fully 3562 * allocated automatically, so they do not need to be covered by the 3563 * preallocation. All that matters is that we will not have to allocate 3564 * new refcount structures for them.) */ 3565 nb_new_l2_tables = DIV_ROUND_UP(nb_new_data_clusters, 3566 s->cluster_size / sizeof(uint64_t)); 3567 /* The cluster range may not be aligned to L2 boundaries, so add one L2 3568 * table for a potential head/tail */ 3569 nb_new_l2_tables++; 3570 3571 allocation_start = qcow2_refcount_area(bs, old_file_size, 3572 nb_new_data_clusters + 3573 nb_new_l2_tables, 3574 true, 0, 0); 3575 if (allocation_start < 0) { 3576 error_setg_errno(errp, -allocation_start, 3577 "Failed to resize refcount structures"); 3578 ret = allocation_start; 3579 goto fail; 3580 } 3581 3582 clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start, 3583 nb_new_data_clusters); 3584 if (clusters_allocated < 0) { 3585 error_setg_errno(errp, -clusters_allocated, 3586 "Failed to allocate data clusters"); 3587 ret = clusters_allocated; 3588 goto fail; 3589 } 3590 3591 assert(clusters_allocated == nb_new_data_clusters); 3592 3593 /* Allocate the data area */ 3594 new_file_size = allocation_start + 3595 nb_new_data_clusters * s->cluster_size; 3596 ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp); 3597 if (ret < 0) { 3598 error_prepend(errp, "Failed to resize underlying file: "); 3599 qcow2_free_clusters(bs, allocation_start, 3600 nb_new_data_clusters * s->cluster_size, 3601 QCOW2_DISCARD_OTHER); 3602 goto fail; 3603 } 3604 3605 /* Create the necessary L2 entries */ 3606 host_offset = allocation_start; 3607 guest_offset = old_length; 3608 while (nb_new_data_clusters) { 3609 int64_t nb_clusters = MIN( 3610 nb_new_data_clusters, 3611 s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset)); 3612 QCowL2Meta allocation = { 3613 .offset = guest_offset, 3614 .alloc_offset = host_offset, 3615 .nb_clusters = nb_clusters, 3616 }; 3617 qemu_co_queue_init(&allocation.dependent_requests); 3618 3619 ret = qcow2_alloc_cluster_link_l2(bs, &allocation); 3620 if (ret < 0) { 3621 error_setg_errno(errp, -ret, "Failed to update L2 tables"); 3622 qcow2_free_clusters(bs, host_offset, 3623 nb_new_data_clusters * s->cluster_size, 3624 QCOW2_DISCARD_OTHER); 3625 goto fail; 3626 } 3627 3628 guest_offset += nb_clusters * s->cluster_size; 3629 host_offset += nb_clusters * s->cluster_size; 3630 nb_new_data_clusters -= nb_clusters; 3631 } 3632 break; 3633 } 3634 3635 default: 3636 g_assert_not_reached(); 3637 } 3638 3639 if (prealloc != PREALLOC_MODE_OFF) { 3640 /* Flush metadata before actually changing the image size */ 3641 ret = qcow2_write_caches(bs); 3642 if (ret < 0) { 3643 error_setg_errno(errp, -ret, 3644 "Failed to flush the preallocated area to disk"); 3645 goto fail; 3646 } 3647 } 3648 3649 /* write updated header.size */ 3650 offset = cpu_to_be64(offset); 3651 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), 3652 &offset, sizeof(uint64_t)); 3653 if (ret < 0) { 3654 error_setg_errno(errp, -ret, "Failed to update the image size"); 3655 goto fail; 3656 } 3657 3658 s->l1_vm_state_index = new_l1_size; 3659 ret = 0; 3660 fail: 3661 qemu_co_mutex_unlock(&s->lock); 3662 return ret; 3663 } 3664 3665 /* 3666 * qcow2_compress() 3667 * 3668 * @dest - destination buffer, at least of @size-1 bytes 3669 * @src - source buffer, @size bytes 3670 * 3671 * Returns: compressed size on success 3672 * -1 if compression is inefficient 3673 * -2 on any other error 3674 */ 3675 static ssize_t qcow2_compress(void *dest, const void *src, size_t size) 3676 { 3677 ssize_t ret; 3678 z_stream strm; 3679 3680 /* best compression, small window, no zlib header */ 3681 memset(&strm, 0, sizeof(strm)); 3682 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 3683 -12, 9, Z_DEFAULT_STRATEGY); 3684 if (ret != 0) { 3685 return -2; 3686 } 3687 3688 /* strm.next_in is not const in old zlib versions, such as those used on 3689 * OpenBSD/NetBSD, so cast the const away */ 3690 strm.avail_in = size; 3691 strm.next_in = (void *) src; 3692 strm.avail_out = size - 1; 3693 strm.next_out = dest; 3694 3695 ret = deflate(&strm, Z_FINISH); 3696 if (ret == Z_STREAM_END) { 3697 ret = size - 1 - strm.avail_out; 3698 } else { 3699 ret = (ret == Z_OK ? -1 : -2); 3700 } 3701 3702 deflateEnd(&strm); 3703 3704 return ret; 3705 } 3706 3707 #define MAX_COMPRESS_THREADS 4 3708 3709 typedef struct Qcow2CompressData { 3710 void *dest; 3711 const void *src; 3712 size_t size; 3713 ssize_t ret; 3714 } Qcow2CompressData; 3715 3716 static int qcow2_compress_pool_func(void *opaque) 3717 { 3718 Qcow2CompressData *data = opaque; 3719 3720 data->ret = qcow2_compress(data->dest, data->src, data->size); 3721 3722 return 0; 3723 } 3724 3725 static void qcow2_compress_complete(void *opaque, int ret) 3726 { 3727 qemu_coroutine_enter(opaque); 3728 } 3729 3730 /* See qcow2_compress definition for parameters description */ 3731 static ssize_t qcow2_co_compress(BlockDriverState *bs, 3732 void *dest, const void *src, size_t size) 3733 { 3734 BDRVQcow2State *s = bs->opaque; 3735 BlockAIOCB *acb; 3736 ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); 3737 Qcow2CompressData arg = { 3738 .dest = dest, 3739 .src = src, 3740 .size = size, 3741 }; 3742 3743 while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) { 3744 qemu_co_queue_wait(&s->compress_wait_queue, NULL); 3745 } 3746 3747 s->nb_compress_threads++; 3748 acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg, 3749 qcow2_compress_complete, 3750 qemu_coroutine_self()); 3751 3752 if (!acb) { 3753 s->nb_compress_threads--; 3754 return -EINVAL; 3755 } 3756 qemu_coroutine_yield(); 3757 s->nb_compress_threads--; 3758 qemu_co_queue_next(&s->compress_wait_queue); 3759 3760 return arg.ret; 3761 } 3762 3763 /* XXX: put compressed sectors first, then all the cluster aligned 3764 tables to avoid losing bytes in alignment */ 3765 static coroutine_fn int 3766 qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, 3767 uint64_t bytes, QEMUIOVector *qiov) 3768 { 3769 BDRVQcow2State *s = bs->opaque; 3770 QEMUIOVector hd_qiov; 3771 struct iovec iov; 3772 int ret; 3773 size_t out_len; 3774 uint8_t *buf, *out_buf; 3775 int64_t cluster_offset; 3776 3777 if (bytes == 0) { 3778 /* align end of file to a sector boundary to ease reading with 3779 sector based I/Os */ 3780 cluster_offset = bdrv_getlength(bs->file->bs); 3781 if (cluster_offset < 0) { 3782 return cluster_offset; 3783 } 3784 return bdrv_co_truncate(bs->file, cluster_offset, PREALLOC_MODE_OFF, 3785 NULL); 3786 } 3787 3788 if (offset_into_cluster(s, offset)) { 3789 return -EINVAL; 3790 } 3791 3792 buf = qemu_blockalign(bs, s->cluster_size); 3793 if (bytes != s->cluster_size) { 3794 if (bytes > s->cluster_size || 3795 offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS) 3796 { 3797 qemu_vfree(buf); 3798 return -EINVAL; 3799 } 3800 /* Zero-pad last write if image size is not cluster aligned */ 3801 memset(buf + bytes, 0, s->cluster_size - bytes); 3802 } 3803 qemu_iovec_to_buf(qiov, 0, buf, bytes); 3804 3805 out_buf = g_malloc(s->cluster_size); 3806 3807 out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size); 3808 if (out_len == -2) { 3809 ret = -EINVAL; 3810 goto fail; 3811 } else if (out_len == -1) { 3812 /* could not compress: write normal cluster */ 3813 ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0); 3814 if (ret < 0) { 3815 goto fail; 3816 } 3817 goto success; 3818 } 3819 3820 qemu_co_mutex_lock(&s->lock); 3821 cluster_offset = 3822 qcow2_alloc_compressed_cluster_offset(bs, offset, out_len); 3823 if (!cluster_offset) { 3824 qemu_co_mutex_unlock(&s->lock); 3825 ret = -EIO; 3826 goto fail; 3827 } 3828 cluster_offset &= s->cluster_offset_mask; 3829 3830 ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len); 3831 qemu_co_mutex_unlock(&s->lock); 3832 if (ret < 0) { 3833 goto fail; 3834 } 3835 3836 iov = (struct iovec) { 3837 .iov_base = out_buf, 3838 .iov_len = out_len, 3839 }; 3840 qemu_iovec_init_external(&hd_qiov, &iov, 1); 3841 3842 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); 3843 ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0); 3844 if (ret < 0) { 3845 goto fail; 3846 } 3847 success: 3848 ret = 0; 3849 fail: 3850 qemu_vfree(buf); 3851 g_free(out_buf); 3852 return ret; 3853 } 3854 3855 static int make_completely_empty(BlockDriverState *bs) 3856 { 3857 BDRVQcow2State *s = bs->opaque; 3858 Error *local_err = NULL; 3859 int ret, l1_clusters; 3860 int64_t offset; 3861 uint64_t *new_reftable = NULL; 3862 uint64_t rt_entry, l1_size2; 3863 struct { 3864 uint64_t l1_offset; 3865 uint64_t reftable_offset; 3866 uint32_t reftable_clusters; 3867 } QEMU_PACKED l1_ofs_rt_ofs_cls; 3868 3869 ret = qcow2_cache_empty(bs, s->l2_table_cache); 3870 if (ret < 0) { 3871 goto fail; 3872 } 3873 3874 ret = qcow2_cache_empty(bs, s->refcount_block_cache); 3875 if (ret < 0) { 3876 goto fail; 3877 } 3878 3879 /* Refcounts will be broken utterly */ 3880 ret = qcow2_mark_dirty(bs); 3881 if (ret < 0) { 3882 goto fail; 3883 } 3884 3885 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 3886 3887 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 3888 l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t); 3889 3890 /* After this call, neither the in-memory nor the on-disk refcount 3891 * information accurately describe the actual references */ 3892 3893 ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset, 3894 l1_clusters * s->cluster_size, 0); 3895 if (ret < 0) { 3896 goto fail_broken_refcounts; 3897 } 3898 memset(s->l1_table, 0, l1_size2); 3899 3900 BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE); 3901 3902 /* Overwrite enough clusters at the beginning of the sectors to place 3903 * the refcount table, a refcount block and the L1 table in; this may 3904 * overwrite parts of the existing refcount and L1 table, which is not 3905 * an issue because the dirty flag is set, complete data loss is in fact 3906 * desired and partial data loss is consequently fine as well */ 3907 ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size, 3908 (2 + l1_clusters) * s->cluster_size, 0); 3909 /* This call (even if it failed overall) may have overwritten on-disk 3910 * refcount structures; in that case, the in-memory refcount information 3911 * will probably differ from the on-disk information which makes the BDS 3912 * unusable */ 3913 if (ret < 0) { 3914 goto fail_broken_refcounts; 3915 } 3916 3917 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 3918 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE); 3919 3920 /* "Create" an empty reftable (one cluster) directly after the image 3921 * header and an empty L1 table three clusters after the image header; 3922 * the cluster between those two will be used as the first refblock */ 3923 l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size); 3924 l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size); 3925 l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1); 3926 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset), 3927 &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls)); 3928 if (ret < 0) { 3929 goto fail_broken_refcounts; 3930 } 3931 3932 s->l1_table_offset = 3 * s->cluster_size; 3933 3934 new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t)); 3935 if (!new_reftable) { 3936 ret = -ENOMEM; 3937 goto fail_broken_refcounts; 3938 } 3939 3940 s->refcount_table_offset = s->cluster_size; 3941 s->refcount_table_size = s->cluster_size / sizeof(uint64_t); 3942 s->max_refcount_table_index = 0; 3943 3944 g_free(s->refcount_table); 3945 s->refcount_table = new_reftable; 3946 new_reftable = NULL; 3947 3948 /* Now the in-memory refcount information again corresponds to the on-disk 3949 * information (reftable is empty and no refblocks (the refblock cache is 3950 * empty)); however, this means some clusters (e.g. the image header) are 3951 * referenced, but not refcounted, but the normal qcow2 code assumes that 3952 * the in-memory information is always correct */ 3953 3954 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); 3955 3956 /* Enter the first refblock into the reftable */ 3957 rt_entry = cpu_to_be64(2 * s->cluster_size); 3958 ret = bdrv_pwrite_sync(bs->file, s->cluster_size, 3959 &rt_entry, sizeof(rt_entry)); 3960 if (ret < 0) { 3961 goto fail_broken_refcounts; 3962 } 3963 s->refcount_table[0] = 2 * s->cluster_size; 3964 3965 s->free_cluster_index = 0; 3966 assert(3 + l1_clusters <= s->refcount_block_size); 3967 offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2); 3968 if (offset < 0) { 3969 ret = offset; 3970 goto fail_broken_refcounts; 3971 } else if (offset > 0) { 3972 error_report("First cluster in emptied image is in use"); 3973 abort(); 3974 } 3975 3976 /* Now finally the in-memory information corresponds to the on-disk 3977 * structures and is correct */ 3978 ret = qcow2_mark_clean(bs); 3979 if (ret < 0) { 3980 goto fail; 3981 } 3982 3983 ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, 3984 PREALLOC_MODE_OFF, &local_err); 3985 if (ret < 0) { 3986 error_report_err(local_err); 3987 goto fail; 3988 } 3989 3990 return 0; 3991 3992 fail_broken_refcounts: 3993 /* The BDS is unusable at this point. If we wanted to make it usable, we 3994 * would have to call qcow2_refcount_close(), qcow2_refcount_init(), 3995 * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init() 3996 * again. However, because the functions which could have caused this error 3997 * path to be taken are used by those functions as well, it's very likely 3998 * that that sequence will fail as well. Therefore, just eject the BDS. */ 3999 bs->drv = NULL; 4000 4001 fail: 4002 g_free(new_reftable); 4003 return ret; 4004 } 4005 4006 static int qcow2_make_empty(BlockDriverState *bs) 4007 { 4008 BDRVQcow2State *s = bs->opaque; 4009 uint64_t offset, end_offset; 4010 int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size); 4011 int l1_clusters, ret = 0; 4012 4013 l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t)); 4014 4015 if (s->qcow_version >= 3 && !s->snapshots && !s->nb_bitmaps && 4016 3 + l1_clusters <= s->refcount_block_size && 4017 s->crypt_method_header != QCOW_CRYPT_LUKS) { 4018 /* The following function only works for qcow2 v3 images (it 4019 * requires the dirty flag) and only as long as there are no 4020 * features that reserve extra clusters (such as snapshots, 4021 * LUKS header, or persistent bitmaps), because it completely 4022 * empties the image. Furthermore, the L1 table and three 4023 * additional clusters (image header, refcount table, one 4024 * refcount block) have to fit inside one refcount block. */ 4025 return make_completely_empty(bs); 4026 } 4027 4028 /* This fallback code simply discards every active cluster; this is slow, 4029 * but works in all cases */ 4030 end_offset = bs->total_sectors * BDRV_SECTOR_SIZE; 4031 for (offset = 0; offset < end_offset; offset += step) { 4032 /* As this function is generally used after committing an external 4033 * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the 4034 * default action for this kind of discard is to pass the discard, 4035 * which will ideally result in an actually smaller image file, as 4036 * is probably desired. */ 4037 ret = qcow2_cluster_discard(bs, offset, MIN(step, end_offset - offset), 4038 QCOW2_DISCARD_SNAPSHOT, true); 4039 if (ret < 0) { 4040 break; 4041 } 4042 } 4043 4044 return ret; 4045 } 4046 4047 static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) 4048 { 4049 BDRVQcow2State *s = bs->opaque; 4050 int ret; 4051 4052 qemu_co_mutex_lock(&s->lock); 4053 ret = qcow2_write_caches(bs); 4054 qemu_co_mutex_unlock(&s->lock); 4055 4056 return ret; 4057 } 4058 4059 static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, 4060 Error **errp) 4061 { 4062 Error *local_err = NULL; 4063 BlockMeasureInfo *info; 4064 uint64_t required = 0; /* bytes that contribute to required size */ 4065 uint64_t virtual_size; /* disk size as seen by guest */ 4066 uint64_t refcount_bits; 4067 uint64_t l2_tables; 4068 size_t cluster_size; 4069 int version; 4070 char *optstr; 4071 PreallocMode prealloc; 4072 bool has_backing_file; 4073 4074 /* Parse image creation options */ 4075 cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err); 4076 if (local_err) { 4077 goto err; 4078 } 4079 4080 version = qcow2_opt_get_version_del(opts, &local_err); 4081 if (local_err) { 4082 goto err; 4083 } 4084 4085 refcount_bits = qcow2_opt_get_refcount_bits_del(opts, version, &local_err); 4086 if (local_err) { 4087 goto err; 4088 } 4089 4090 optstr = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); 4091 prealloc = qapi_enum_parse(&PreallocMode_lookup, optstr, 4092 PREALLOC_MODE_OFF, &local_err); 4093 g_free(optstr); 4094 if (local_err) { 4095 goto err; 4096 } 4097 4098 optstr = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); 4099 has_backing_file = !!optstr; 4100 g_free(optstr); 4101 4102 virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); 4103 virtual_size = ROUND_UP(virtual_size, cluster_size); 4104 4105 /* Check that virtual disk size is valid */ 4106 l2_tables = DIV_ROUND_UP(virtual_size / cluster_size, 4107 cluster_size / sizeof(uint64_t)); 4108 if (l2_tables * sizeof(uint64_t) > QCOW_MAX_L1_SIZE) { 4109 error_setg(&local_err, "The image size is too large " 4110 "(try using a larger cluster size)"); 4111 goto err; 4112 } 4113 4114 /* Account for input image */ 4115 if (in_bs) { 4116 int64_t ssize = bdrv_getlength(in_bs); 4117 if (ssize < 0) { 4118 error_setg_errno(&local_err, -ssize, 4119 "Unable to get image virtual_size"); 4120 goto err; 4121 } 4122 4123 virtual_size = ROUND_UP(ssize, cluster_size); 4124 4125 if (has_backing_file) { 4126 /* We don't how much of the backing chain is shared by the input 4127 * image and the new image file. In the worst case the new image's 4128 * backing file has nothing in common with the input image. Be 4129 * conservative and assume all clusters need to be written. 4130 */ 4131 required = virtual_size; 4132 } else { 4133 int64_t offset; 4134 int64_t pnum = 0; 4135 4136 for (offset = 0; offset < ssize; offset += pnum) { 4137 int ret; 4138 4139 ret = bdrv_block_status_above(in_bs, NULL, offset, 4140 ssize - offset, &pnum, NULL, 4141 NULL); 4142 if (ret < 0) { 4143 error_setg_errno(&local_err, -ret, 4144 "Unable to get block status"); 4145 goto err; 4146 } 4147 4148 if (ret & BDRV_BLOCK_ZERO) { 4149 /* Skip zero regions (safe with no backing file) */ 4150 } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) == 4151 (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) { 4152 /* Extend pnum to end of cluster for next iteration */ 4153 pnum = ROUND_UP(offset + pnum, cluster_size) - offset; 4154 4155 /* Count clusters we've seen */ 4156 required += offset % cluster_size + pnum; 4157 } 4158 } 4159 } 4160 } 4161 4162 /* Take into account preallocation. Nothing special is needed for 4163 * PREALLOC_MODE_METADATA since metadata is always counted. 4164 */ 4165 if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { 4166 required = virtual_size; 4167 } 4168 4169 info = g_new(BlockMeasureInfo, 1); 4170 info->fully_allocated = 4171 qcow2_calc_prealloc_size(virtual_size, cluster_size, 4172 ctz32(refcount_bits)); 4173 4174 /* Remove data clusters that are not required. This overestimates the 4175 * required size because metadata needed for the fully allocated file is 4176 * still counted. 4177 */ 4178 info->required = info->fully_allocated - virtual_size + required; 4179 return info; 4180 4181 err: 4182 error_propagate(errp, local_err); 4183 return NULL; 4184 } 4185 4186 static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 4187 { 4188 BDRVQcow2State *s = bs->opaque; 4189 bdi->unallocated_blocks_are_zero = true; 4190 bdi->cluster_size = s->cluster_size; 4191 bdi->vm_state_offset = qcow2_vm_state_offset(s); 4192 return 0; 4193 } 4194 4195 static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) 4196 { 4197 BDRVQcow2State *s = bs->opaque; 4198 ImageInfoSpecific *spec_info; 4199 QCryptoBlockInfo *encrypt_info = NULL; 4200 4201 if (s->crypto != NULL) { 4202 encrypt_info = qcrypto_block_get_info(s->crypto, &error_abort); 4203 } 4204 4205 spec_info = g_new(ImageInfoSpecific, 1); 4206 *spec_info = (ImageInfoSpecific){ 4207 .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, 4208 .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1), 4209 }; 4210 if (s->qcow_version == 2) { 4211 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 4212 .compat = g_strdup("0.10"), 4213 .refcount_bits = s->refcount_bits, 4214 }; 4215 } else if (s->qcow_version == 3) { 4216 *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ 4217 .compat = g_strdup("1.1"), 4218 .lazy_refcounts = s->compatible_features & 4219 QCOW2_COMPAT_LAZY_REFCOUNTS, 4220 .has_lazy_refcounts = true, 4221 .corrupt = s->incompatible_features & 4222 QCOW2_INCOMPAT_CORRUPT, 4223 .has_corrupt = true, 4224 .refcount_bits = s->refcount_bits, 4225 }; 4226 } else { 4227 /* if this assertion fails, this probably means a new version was 4228 * added without having it covered here */ 4229 assert(false); 4230 } 4231 4232 if (encrypt_info) { 4233 ImageInfoSpecificQCow2Encryption *qencrypt = 4234 g_new(ImageInfoSpecificQCow2Encryption, 1); 4235 switch (encrypt_info->format) { 4236 case Q_CRYPTO_BLOCK_FORMAT_QCOW: 4237 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_AES; 4238 break; 4239 case Q_CRYPTO_BLOCK_FORMAT_LUKS: 4240 qencrypt->format = BLOCKDEV_QCOW2_ENCRYPTION_FORMAT_LUKS; 4241 qencrypt->u.luks = encrypt_info->u.luks; 4242 break; 4243 default: 4244 abort(); 4245 } 4246 /* Since we did shallow copy above, erase any pointers 4247 * in the original info */ 4248 memset(&encrypt_info->u, 0, sizeof(encrypt_info->u)); 4249 qapi_free_QCryptoBlockInfo(encrypt_info); 4250 4251 spec_info->u.qcow2.data->has_encrypt = true; 4252 spec_info->u.qcow2.data->encrypt = qencrypt; 4253 } 4254 4255 return spec_info; 4256 } 4257 4258 static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 4259 int64_t pos) 4260 { 4261 BDRVQcow2State *s = bs->opaque; 4262 4263 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); 4264 return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos, 4265 qiov->size, qiov, 0); 4266 } 4267 4268 static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 4269 int64_t pos) 4270 { 4271 BDRVQcow2State *s = bs->opaque; 4272 4273 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); 4274 return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos, 4275 qiov->size, qiov, 0); 4276 } 4277 4278 /* 4279 * Downgrades an image's version. To achieve this, any incompatible features 4280 * have to be removed. 4281 */ 4282 static int qcow2_downgrade(BlockDriverState *bs, int target_version, 4283 BlockDriverAmendStatusCB *status_cb, void *cb_opaque, 4284 Error **errp) 4285 { 4286 BDRVQcow2State *s = bs->opaque; 4287 int current_version = s->qcow_version; 4288 int ret; 4289 4290 /* This is qcow2_downgrade(), not qcow2_upgrade() */ 4291 assert(target_version < current_version); 4292 4293 /* There are no other versions (now) that you can downgrade to */ 4294 assert(target_version == 2); 4295 4296 if (s->refcount_order != 4) { 4297 error_setg(errp, "compat=0.10 requires refcount_bits=16"); 4298 return -ENOTSUP; 4299 } 4300 4301 /* clear incompatible features */ 4302 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 4303 ret = qcow2_mark_clean(bs); 4304 if (ret < 0) { 4305 error_setg_errno(errp, -ret, "Failed to make the image clean"); 4306 return ret; 4307 } 4308 } 4309 4310 /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in 4311 * the first place; if that happens nonetheless, returning -ENOTSUP is the 4312 * best thing to do anyway */ 4313 4314 if (s->incompatible_features) { 4315 error_setg(errp, "Cannot downgrade an image with incompatible features " 4316 "%#" PRIx64 " set", s->incompatible_features); 4317 return -ENOTSUP; 4318 } 4319 4320 /* since we can ignore compatible features, we can set them to 0 as well */ 4321 s->compatible_features = 0; 4322 /* if lazy refcounts have been used, they have already been fixed through 4323 * clearing the dirty flag */ 4324 4325 /* clearing autoclear features is trivial */ 4326 s->autoclear_features = 0; 4327 4328 ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque); 4329 if (ret < 0) { 4330 error_setg_errno(errp, -ret, "Failed to turn zero into data clusters"); 4331 return ret; 4332 } 4333 4334 s->qcow_version = target_version; 4335 ret = qcow2_update_header(bs); 4336 if (ret < 0) { 4337 s->qcow_version = current_version; 4338 error_setg_errno(errp, -ret, "Failed to update the image header"); 4339 return ret; 4340 } 4341 return 0; 4342 } 4343 4344 typedef enum Qcow2AmendOperation { 4345 /* This is the value Qcow2AmendHelperCBInfo::last_operation will be 4346 * statically initialized to so that the helper CB can discern the first 4347 * invocation from an operation change */ 4348 QCOW2_NO_OPERATION = 0, 4349 4350 QCOW2_CHANGING_REFCOUNT_ORDER, 4351 QCOW2_DOWNGRADING, 4352 } Qcow2AmendOperation; 4353 4354 typedef struct Qcow2AmendHelperCBInfo { 4355 /* The code coordinating the amend operations should only modify 4356 * these four fields; the rest will be managed by the CB */ 4357 BlockDriverAmendStatusCB *original_status_cb; 4358 void *original_cb_opaque; 4359 4360 Qcow2AmendOperation current_operation; 4361 4362 /* Total number of operations to perform (only set once) */ 4363 int total_operations; 4364 4365 /* The following fields are managed by the CB */ 4366 4367 /* Number of operations completed */ 4368 int operations_completed; 4369 4370 /* Cumulative offset of all completed operations */ 4371 int64_t offset_completed; 4372 4373 Qcow2AmendOperation last_operation; 4374 int64_t last_work_size; 4375 } Qcow2AmendHelperCBInfo; 4376 4377 static void qcow2_amend_helper_cb(BlockDriverState *bs, 4378 int64_t operation_offset, 4379 int64_t operation_work_size, void *opaque) 4380 { 4381 Qcow2AmendHelperCBInfo *info = opaque; 4382 int64_t current_work_size; 4383 int64_t projected_work_size; 4384 4385 if (info->current_operation != info->last_operation) { 4386 if (info->last_operation != QCOW2_NO_OPERATION) { 4387 info->offset_completed += info->last_work_size; 4388 info->operations_completed++; 4389 } 4390 4391 info->last_operation = info->current_operation; 4392 } 4393 4394 assert(info->total_operations > 0); 4395 assert(info->operations_completed < info->total_operations); 4396 4397 info->last_work_size = operation_work_size; 4398 4399 current_work_size = info->offset_completed + operation_work_size; 4400 4401 /* current_work_size is the total work size for (operations_completed + 1) 4402 * operations (which includes this one), so multiply it by the number of 4403 * operations not covered and divide it by the number of operations 4404 * covered to get a projection for the operations not covered */ 4405 projected_work_size = current_work_size * (info->total_operations - 4406 info->operations_completed - 1) 4407 / (info->operations_completed + 1); 4408 4409 info->original_status_cb(bs, info->offset_completed + operation_offset, 4410 current_work_size + projected_work_size, 4411 info->original_cb_opaque); 4412 } 4413 4414 static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, 4415 BlockDriverAmendStatusCB *status_cb, 4416 void *cb_opaque, 4417 Error **errp) 4418 { 4419 BDRVQcow2State *s = bs->opaque; 4420 int old_version = s->qcow_version, new_version = old_version; 4421 uint64_t new_size = 0; 4422 const char *backing_file = NULL, *backing_format = NULL; 4423 bool lazy_refcounts = s->use_lazy_refcounts; 4424 const char *compat = NULL; 4425 uint64_t cluster_size = s->cluster_size; 4426 bool encrypt; 4427 int encformat; 4428 int refcount_bits = s->refcount_bits; 4429 int ret; 4430 QemuOptDesc *desc = opts->list->desc; 4431 Qcow2AmendHelperCBInfo helper_cb_info; 4432 4433 while (desc && desc->name) { 4434 if (!qemu_opt_find(opts, desc->name)) { 4435 /* only change explicitly defined options */ 4436 desc++; 4437 continue; 4438 } 4439 4440 if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) { 4441 compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL); 4442 if (!compat) { 4443 /* preserve default */ 4444 } else if (!strcmp(compat, "0.10")) { 4445 new_version = 2; 4446 } else if (!strcmp(compat, "1.1")) { 4447 new_version = 3; 4448 } else { 4449 error_setg(errp, "Unknown compatibility level %s", compat); 4450 return -EINVAL; 4451 } 4452 } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) { 4453 error_setg(errp, "Cannot change preallocation mode"); 4454 return -ENOTSUP; 4455 } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { 4456 new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); 4457 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) { 4458 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); 4459 } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) { 4460 backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); 4461 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) { 4462 encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, 4463 !!s->crypto); 4464 4465 if (encrypt != !!s->crypto) { 4466 error_setg(errp, 4467 "Changing the encryption flag is not supported"); 4468 return -ENOTSUP; 4469 } 4470 } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT_FORMAT)) { 4471 encformat = qcow2_crypt_method_from_format( 4472 qemu_opt_get(opts, BLOCK_OPT_ENCRYPT_FORMAT)); 4473 4474 if (encformat != s->crypt_method_header) { 4475 error_setg(errp, 4476 "Changing the encryption format is not supported"); 4477 return -ENOTSUP; 4478 } 4479 } else if (g_str_has_prefix(desc->name, "encrypt.")) { 4480 error_setg(errp, 4481 "Changing the encryption parameters is not supported"); 4482 return -ENOTSUP; 4483 } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { 4484 cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 4485 cluster_size); 4486 if (cluster_size != s->cluster_size) { 4487 error_setg(errp, "Changing the cluster size is not supported"); 4488 return -ENOTSUP; 4489 } 4490 } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { 4491 lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, 4492 lazy_refcounts); 4493 } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { 4494 refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS, 4495 refcount_bits); 4496 4497 if (refcount_bits <= 0 || refcount_bits > 64 || 4498 !is_power_of_2(refcount_bits)) 4499 { 4500 error_setg(errp, "Refcount width must be a power of two and " 4501 "may not exceed 64 bits"); 4502 return -EINVAL; 4503 } 4504 } else { 4505 /* if this point is reached, this probably means a new option was 4506 * added without having it covered here */ 4507 abort(); 4508 } 4509 4510 desc++; 4511 } 4512 4513 helper_cb_info = (Qcow2AmendHelperCBInfo){ 4514 .original_status_cb = status_cb, 4515 .original_cb_opaque = cb_opaque, 4516 .total_operations = (new_version < old_version) 4517 + (s->refcount_bits != refcount_bits) 4518 }; 4519 4520 /* Upgrade first (some features may require compat=1.1) */ 4521 if (new_version > old_version) { 4522 s->qcow_version = new_version; 4523 ret = qcow2_update_header(bs); 4524 if (ret < 0) { 4525 s->qcow_version = old_version; 4526 error_setg_errno(errp, -ret, "Failed to update the image header"); 4527 return ret; 4528 } 4529 } 4530 4531 if (s->refcount_bits != refcount_bits) { 4532 int refcount_order = ctz32(refcount_bits); 4533 4534 if (new_version < 3 && refcount_bits != 16) { 4535 error_setg(errp, "Refcount widths other than 16 bits require " 4536 "compatibility level 1.1 or above (use compat=1.1 or " 4537 "greater)"); 4538 return -EINVAL; 4539 } 4540 4541 helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; 4542 ret = qcow2_change_refcount_order(bs, refcount_order, 4543 &qcow2_amend_helper_cb, 4544 &helper_cb_info, errp); 4545 if (ret < 0) { 4546 return ret; 4547 } 4548 } 4549 4550 if (backing_file || backing_format) { 4551 ret = qcow2_change_backing_file(bs, 4552 backing_file ?: s->image_backing_file, 4553 backing_format ?: s->image_backing_format); 4554 if (ret < 0) { 4555 error_setg_errno(errp, -ret, "Failed to change the backing file"); 4556 return ret; 4557 } 4558 } 4559 4560 if (s->use_lazy_refcounts != lazy_refcounts) { 4561 if (lazy_refcounts) { 4562 if (new_version < 3) { 4563 error_setg(errp, "Lazy refcounts only supported with " 4564 "compatibility level 1.1 and above (use compat=1.1 " 4565 "or greater)"); 4566 return -EINVAL; 4567 } 4568 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 4569 ret = qcow2_update_header(bs); 4570 if (ret < 0) { 4571 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 4572 error_setg_errno(errp, -ret, "Failed to update the image header"); 4573 return ret; 4574 } 4575 s->use_lazy_refcounts = true; 4576 } else { 4577 /* make image clean first */ 4578 ret = qcow2_mark_clean(bs); 4579 if (ret < 0) { 4580 error_setg_errno(errp, -ret, "Failed to make the image clean"); 4581 return ret; 4582 } 4583 /* now disallow lazy refcounts */ 4584 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 4585 ret = qcow2_update_header(bs); 4586 if (ret < 0) { 4587 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 4588 error_setg_errno(errp, -ret, "Failed to update the image header"); 4589 return ret; 4590 } 4591 s->use_lazy_refcounts = false; 4592 } 4593 } 4594 4595 if (new_size) { 4596 BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); 4597 ret = blk_insert_bs(blk, bs, errp); 4598 if (ret < 0) { 4599 blk_unref(blk); 4600 return ret; 4601 } 4602 4603 ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, errp); 4604 blk_unref(blk); 4605 if (ret < 0) { 4606 return ret; 4607 } 4608 } 4609 4610 /* Downgrade last (so unsupported features can be removed before) */ 4611 if (new_version < old_version) { 4612 helper_cb_info.current_operation = QCOW2_DOWNGRADING; 4613 ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb, 4614 &helper_cb_info, errp); 4615 if (ret < 0) { 4616 return ret; 4617 } 4618 } 4619 4620 return 0; 4621 } 4622 4623 /* 4624 * If offset or size are negative, respectively, they will not be included in 4625 * the BLOCK_IMAGE_CORRUPTED event emitted. 4626 * fatal will be ignored for read-only BDS; corruptions found there will always 4627 * be considered non-fatal. 4628 */ 4629 void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, 4630 int64_t size, const char *message_format, ...) 4631 { 4632 BDRVQcow2State *s = bs->opaque; 4633 const char *node_name; 4634 char *message; 4635 va_list ap; 4636 4637 fatal = fatal && bdrv_is_writable(bs); 4638 4639 if (s->signaled_corruption && 4640 (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT))) 4641 { 4642 return; 4643 } 4644 4645 va_start(ap, message_format); 4646 message = g_strdup_vprintf(message_format, ap); 4647 va_end(ap); 4648 4649 if (fatal) { 4650 fprintf(stderr, "qcow2: Marking image as corrupt: %s; further " 4651 "corruption events will be suppressed\n", message); 4652 } else { 4653 fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal " 4654 "corruption events will be suppressed\n", message); 4655 } 4656 4657 node_name = bdrv_get_node_name(bs); 4658 qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), 4659 *node_name != '\0', node_name, 4660 message, offset >= 0, offset, 4661 size >= 0, size, 4662 fatal, &error_abort); 4663 g_free(message); 4664 4665 if (fatal) { 4666 qcow2_mark_corrupt(bs); 4667 bs->drv = NULL; /* make BDS unusable */ 4668 } 4669 4670 s->signaled_corruption = true; 4671 } 4672 4673 static QemuOptsList qcow2_create_opts = { 4674 .name = "qcow2-create-opts", 4675 .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head), 4676 .desc = { 4677 { 4678 .name = BLOCK_OPT_SIZE, 4679 .type = QEMU_OPT_SIZE, 4680 .help = "Virtual disk size" 4681 }, 4682 { 4683 .name = BLOCK_OPT_COMPAT_LEVEL, 4684 .type = QEMU_OPT_STRING, 4685 .help = "Compatibility level (0.10 or 1.1)" 4686 }, 4687 { 4688 .name = BLOCK_OPT_BACKING_FILE, 4689 .type = QEMU_OPT_STRING, 4690 .help = "File name of a base image" 4691 }, 4692 { 4693 .name = BLOCK_OPT_BACKING_FMT, 4694 .type = QEMU_OPT_STRING, 4695 .help = "Image format of the base image" 4696 }, 4697 { 4698 .name = BLOCK_OPT_ENCRYPT, 4699 .type = QEMU_OPT_BOOL, 4700 .help = "Encrypt the image with format 'aes'. (Deprecated " 4701 "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", 4702 }, 4703 { 4704 .name = BLOCK_OPT_ENCRYPT_FORMAT, 4705 .type = QEMU_OPT_STRING, 4706 .help = "Encrypt the image, format choices: 'aes', 'luks'", 4707 }, 4708 BLOCK_CRYPTO_OPT_DEF_KEY_SECRET("encrypt.", 4709 "ID of secret providing qcow AES key or LUKS passphrase"), 4710 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG("encrypt."), 4711 BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE("encrypt."), 4712 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG("encrypt."), 4713 BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG("encrypt."), 4714 BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG("encrypt."), 4715 BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME("encrypt."), 4716 { 4717 .name = BLOCK_OPT_CLUSTER_SIZE, 4718 .type = QEMU_OPT_SIZE, 4719 .help = "qcow2 cluster size", 4720 .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) 4721 }, 4722 { 4723 .name = BLOCK_OPT_PREALLOC, 4724 .type = QEMU_OPT_STRING, 4725 .help = "Preallocation mode (allowed values: off, metadata, " 4726 "falloc, full)" 4727 }, 4728 { 4729 .name = BLOCK_OPT_LAZY_REFCOUNTS, 4730 .type = QEMU_OPT_BOOL, 4731 .help = "Postpone refcount updates", 4732 .def_value_str = "off" 4733 }, 4734 { 4735 .name = BLOCK_OPT_REFCOUNT_BITS, 4736 .type = QEMU_OPT_NUMBER, 4737 .help = "Width of a reference count entry in bits", 4738 .def_value_str = "16" 4739 }, 4740 { /* end of list */ } 4741 } 4742 }; 4743 4744 BlockDriver bdrv_qcow2 = { 4745 .format_name = "qcow2", 4746 .instance_size = sizeof(BDRVQcow2State), 4747 .bdrv_probe = qcow2_probe, 4748 .bdrv_open = qcow2_open, 4749 .bdrv_close = qcow2_close, 4750 .bdrv_reopen_prepare = qcow2_reopen_prepare, 4751 .bdrv_reopen_commit = qcow2_reopen_commit, 4752 .bdrv_reopen_abort = qcow2_reopen_abort, 4753 .bdrv_join_options = qcow2_join_options, 4754 .bdrv_child_perm = bdrv_format_default_perms, 4755 .bdrv_co_create_opts = qcow2_co_create_opts, 4756 .bdrv_co_create = qcow2_co_create, 4757 .bdrv_has_zero_init = bdrv_has_zero_init_1, 4758 .bdrv_co_block_status = qcow2_co_block_status, 4759 4760 .bdrv_co_preadv = qcow2_co_preadv, 4761 .bdrv_co_pwritev = qcow2_co_pwritev, 4762 .bdrv_co_flush_to_os = qcow2_co_flush_to_os, 4763 4764 .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, 4765 .bdrv_co_pdiscard = qcow2_co_pdiscard, 4766 .bdrv_co_copy_range_from = qcow2_co_copy_range_from, 4767 .bdrv_co_copy_range_to = qcow2_co_copy_range_to, 4768 .bdrv_co_truncate = qcow2_co_truncate, 4769 .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed, 4770 .bdrv_make_empty = qcow2_make_empty, 4771 4772 .bdrv_snapshot_create = qcow2_snapshot_create, 4773 .bdrv_snapshot_goto = qcow2_snapshot_goto, 4774 .bdrv_snapshot_delete = qcow2_snapshot_delete, 4775 .bdrv_snapshot_list = qcow2_snapshot_list, 4776 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, 4777 .bdrv_measure = qcow2_measure, 4778 .bdrv_get_info = qcow2_get_info, 4779 .bdrv_get_specific_info = qcow2_get_specific_info, 4780 4781 .bdrv_save_vmstate = qcow2_save_vmstate, 4782 .bdrv_load_vmstate = qcow2_load_vmstate, 4783 4784 .supports_backing = true, 4785 .bdrv_change_backing_file = qcow2_change_backing_file, 4786 4787 .bdrv_refresh_limits = qcow2_refresh_limits, 4788 .bdrv_co_invalidate_cache = qcow2_co_invalidate_cache, 4789 .bdrv_inactivate = qcow2_inactivate, 4790 4791 .create_opts = &qcow2_create_opts, 4792 .bdrv_co_check = qcow2_co_check, 4793 .bdrv_amend_options = qcow2_amend_options, 4794 4795 .bdrv_detach_aio_context = qcow2_detach_aio_context, 4796 .bdrv_attach_aio_context = qcow2_attach_aio_context, 4797 4798 .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw, 4799 .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap, 4800 .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap, 4801 }; 4802 4803 static void bdrv_qcow2_init(void) 4804 { 4805 bdrv_register(&bdrv_qcow2); 4806 } 4807 4808 block_init(bdrv_qcow2_init); 4809