1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "qemu-common.h" 25 #include "block/block_int.h" 26 #include "qemu/module.h" 27 #include <zlib.h> 28 #include "qemu/aes.h" 29 #include "block/qcow2.h" 30 #include "qemu/error-report.h" 31 #include "qapi/qmp/qerror.h" 32 #include "qapi/qmp/qbool.h" 33 #include "trace.h" 34 35 /* 36 Differences with QCOW: 37 38 - Support for multiple incremental snapshots. 39 - Memory management by reference counts. 40 - Clusters which have a reference count of one have the bit 41 QCOW_OFLAG_COPIED to optimize write performance. 42 - Size of compressed clusters is stored in sectors to reduce bit usage 43 in the cluster offsets. 44 - Support for storing additional data (such as the VM state) in the 45 snapshots. 46 - If a backing store is used, the cluster size is not constrained 47 (could be backported to QCOW). 48 - L2 tables have always a size of one cluster. 49 */ 50 51 52 typedef struct { 53 uint32_t magic; 54 uint32_t len; 55 } QEMU_PACKED QCowExtension; 56 57 #define QCOW2_EXT_MAGIC_END 0 58 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA 59 #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 60 61 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) 62 { 63 const QCowHeader *cow_header = (const void *)buf; 64 65 if (buf_size >= sizeof(QCowHeader) && 66 be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 67 be32_to_cpu(cow_header->version) >= 2) 68 return 100; 69 else 70 return 0; 71 } 72 73 74 /* 75 * read qcow2 extension and fill bs 76 * start reading from start_offset 77 * finish reading upon magic of value 0 or when end_offset reached 78 * unknown magic is skipped (future extension this version knows nothing about) 79 * return 0 upon success, non-0 otherwise 80 */ 81 static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, 82 uint64_t end_offset, void **p_feature_table, 83 Error **errp) 84 { 85 BDRVQcowState *s = bs->opaque; 86 QCowExtension ext; 87 uint64_t offset; 88 int ret; 89 90 #ifdef DEBUG_EXT 91 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); 92 #endif 93 offset = start_offset; 94 while (offset < end_offset) { 95 96 #ifdef DEBUG_EXT 97 /* Sanity check */ 98 if (offset > s->cluster_size) 99 printf("qcow2_read_extension: suspicious offset %lu\n", offset); 100 101 printf("attempting to read extended header in offset %lu\n", offset); 102 #endif 103 104 ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext)); 105 if (ret < 0) { 106 error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " 107 "pread fail from offset %" PRIu64, offset); 108 return 1; 109 } 110 be32_to_cpus(&ext.magic); 111 be32_to_cpus(&ext.len); 112 offset += sizeof(ext); 113 #ifdef DEBUG_EXT 114 printf("ext.magic = 0x%x\n", ext.magic); 115 #endif 116 if (ext.len > end_offset - offset) { 117 error_setg(errp, "Header extension too large"); 118 return -EINVAL; 119 } 120 121 switch (ext.magic) { 122 case QCOW2_EXT_MAGIC_END: 123 return 0; 124 125 case QCOW2_EXT_MAGIC_BACKING_FORMAT: 126 if (ext.len >= sizeof(bs->backing_format)) { 127 error_setg(errp, "ERROR: ext_backing_format: len=%u too large" 128 " (>=%zu)", ext.len, sizeof(bs->backing_format)); 129 return 2; 130 } 131 ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len); 132 if (ret < 0) { 133 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " 134 "Could not read format name"); 135 return 3; 136 } 137 bs->backing_format[ext.len] = '\0'; 138 #ifdef DEBUG_EXT 139 printf("Qcow2: Got format extension %s\n", bs->backing_format); 140 #endif 141 break; 142 143 case QCOW2_EXT_MAGIC_FEATURE_TABLE: 144 if (p_feature_table != NULL) { 145 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); 146 ret = bdrv_pread(bs->file, offset , feature_table, ext.len); 147 if (ret < 0) { 148 error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " 149 "Could not read table"); 150 return ret; 151 } 152 153 *p_feature_table = feature_table; 154 } 155 break; 156 157 default: 158 /* unknown magic - save it in case we need to rewrite the header */ 159 { 160 Qcow2UnknownHeaderExtension *uext; 161 162 uext = g_malloc0(sizeof(*uext) + ext.len); 163 uext->magic = ext.magic; 164 uext->len = ext.len; 165 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); 166 167 ret = bdrv_pread(bs->file, offset , uext->data, uext->len); 168 if (ret < 0) { 169 error_setg_errno(errp, -ret, "ERROR: unknown extension: " 170 "Could not read data"); 171 return ret; 172 } 173 } 174 break; 175 } 176 177 offset += ((ext.len + 7) & ~7); 178 } 179 180 return 0; 181 } 182 183 static void cleanup_unknown_header_ext(BlockDriverState *bs) 184 { 185 BDRVQcowState *s = bs->opaque; 186 Qcow2UnknownHeaderExtension *uext, *next; 187 188 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { 189 QLIST_REMOVE(uext, next); 190 g_free(uext); 191 } 192 } 193 194 static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs, 195 Error **errp, const char *fmt, ...) 196 { 197 char msg[64]; 198 va_list ap; 199 200 va_start(ap, fmt); 201 vsnprintf(msg, sizeof(msg), fmt, ap); 202 va_end(ap); 203 204 error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, bs->device_name, "qcow2", 205 msg); 206 } 207 208 static void report_unsupported_feature(BlockDriverState *bs, 209 Error **errp, Qcow2Feature *table, uint64_t mask) 210 { 211 while (table && table->name[0] != '\0') { 212 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { 213 if (mask & (1 << table->bit)) { 214 report_unsupported(bs, errp, "%.46s", table->name); 215 mask &= ~(1 << table->bit); 216 } 217 } 218 table++; 219 } 220 221 if (mask) { 222 report_unsupported(bs, errp, "Unknown incompatible feature: %" PRIx64, 223 mask); 224 } 225 } 226 227 /* 228 * Sets the dirty bit and flushes afterwards if necessary. 229 * 230 * The incompatible_features bit is only set if the image file header was 231 * updated successfully. Therefore it is not required to check the return 232 * value of this function. 233 */ 234 int qcow2_mark_dirty(BlockDriverState *bs) 235 { 236 BDRVQcowState *s = bs->opaque; 237 uint64_t val; 238 int ret; 239 240 assert(s->qcow_version >= 3); 241 242 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 243 return 0; /* already dirty */ 244 } 245 246 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); 247 ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), 248 &val, sizeof(val)); 249 if (ret < 0) { 250 return ret; 251 } 252 ret = bdrv_flush(bs->file); 253 if (ret < 0) { 254 return ret; 255 } 256 257 /* Only treat image as dirty if the header was updated successfully */ 258 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY; 259 return 0; 260 } 261 262 /* 263 * Clears the dirty bit and flushes before if necessary. Only call this 264 * function when there are no pending requests, it does not guard against 265 * concurrent requests dirtying the image. 266 */ 267 static int qcow2_mark_clean(BlockDriverState *bs) 268 { 269 BDRVQcowState *s = bs->opaque; 270 271 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 272 int ret; 273 274 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY; 275 276 ret = bdrv_flush(bs); 277 if (ret < 0) { 278 return ret; 279 } 280 281 return qcow2_update_header(bs); 282 } 283 return 0; 284 } 285 286 /* 287 * Marks the image as corrupt. 288 */ 289 int qcow2_mark_corrupt(BlockDriverState *bs) 290 { 291 BDRVQcowState *s = bs->opaque; 292 293 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; 294 return qcow2_update_header(bs); 295 } 296 297 /* 298 * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes 299 * before if necessary. 300 */ 301 int qcow2_mark_consistent(BlockDriverState *bs) 302 { 303 BDRVQcowState *s = bs->opaque; 304 305 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 306 int ret = bdrv_flush(bs); 307 if (ret < 0) { 308 return ret; 309 } 310 311 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT; 312 return qcow2_update_header(bs); 313 } 314 return 0; 315 } 316 317 static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result, 318 BdrvCheckMode fix) 319 { 320 int ret = qcow2_check_refcounts(bs, result, fix); 321 if (ret < 0) { 322 return ret; 323 } 324 325 if (fix && result->check_errors == 0 && result->corruptions == 0) { 326 ret = qcow2_mark_clean(bs); 327 if (ret < 0) { 328 return ret; 329 } 330 return qcow2_mark_consistent(bs); 331 } 332 return ret; 333 } 334 335 static int validate_table_offset(BlockDriverState *bs, uint64_t offset, 336 uint64_t entries, size_t entry_len) 337 { 338 BDRVQcowState *s = bs->opaque; 339 uint64_t size; 340 341 /* Use signed INT64_MAX as the maximum even for uint64_t header fields, 342 * because values will be passed to qemu functions taking int64_t. */ 343 if (entries > INT64_MAX / entry_len) { 344 return -EINVAL; 345 } 346 347 size = entries * entry_len; 348 349 if (INT64_MAX - size < offset) { 350 return -EINVAL; 351 } 352 353 /* Tables must be cluster aligned */ 354 if (offset & (s->cluster_size - 1)) { 355 return -EINVAL; 356 } 357 358 return 0; 359 } 360 361 static QemuOptsList qcow2_runtime_opts = { 362 .name = "qcow2", 363 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), 364 .desc = { 365 { 366 .name = QCOW2_OPT_LAZY_REFCOUNTS, 367 .type = QEMU_OPT_BOOL, 368 .help = "Postpone refcount updates", 369 }, 370 { 371 .name = QCOW2_OPT_DISCARD_REQUEST, 372 .type = QEMU_OPT_BOOL, 373 .help = "Pass guest discard requests to the layer below", 374 }, 375 { 376 .name = QCOW2_OPT_DISCARD_SNAPSHOT, 377 .type = QEMU_OPT_BOOL, 378 .help = "Generate discard requests when snapshot related space " 379 "is freed", 380 }, 381 { 382 .name = QCOW2_OPT_DISCARD_OTHER, 383 .type = QEMU_OPT_BOOL, 384 .help = "Generate discard requests when other clusters are freed", 385 }, 386 { 387 .name = QCOW2_OPT_OVERLAP, 388 .type = QEMU_OPT_STRING, 389 .help = "Selects which overlap checks to perform from a range of " 390 "templates (none, constant, cached, all)", 391 }, 392 { 393 .name = QCOW2_OPT_OVERLAP_MAIN_HEADER, 394 .type = QEMU_OPT_BOOL, 395 .help = "Check for unintended writes into the main qcow2 header", 396 }, 397 { 398 .name = QCOW2_OPT_OVERLAP_ACTIVE_L1, 399 .type = QEMU_OPT_BOOL, 400 .help = "Check for unintended writes into the active L1 table", 401 }, 402 { 403 .name = QCOW2_OPT_OVERLAP_ACTIVE_L2, 404 .type = QEMU_OPT_BOOL, 405 .help = "Check for unintended writes into an active L2 table", 406 }, 407 { 408 .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 409 .type = QEMU_OPT_BOOL, 410 .help = "Check for unintended writes into the refcount table", 411 }, 412 { 413 .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 414 .type = QEMU_OPT_BOOL, 415 .help = "Check for unintended writes into a refcount block", 416 }, 417 { 418 .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 419 .type = QEMU_OPT_BOOL, 420 .help = "Check for unintended writes into the snapshot table", 421 }, 422 { 423 .name = QCOW2_OPT_OVERLAP_INACTIVE_L1, 424 .type = QEMU_OPT_BOOL, 425 .help = "Check for unintended writes into an inactive L1 table", 426 }, 427 { 428 .name = QCOW2_OPT_OVERLAP_INACTIVE_L2, 429 .type = QEMU_OPT_BOOL, 430 .help = "Check for unintended writes into an inactive L2 table", 431 }, 432 { /* end of list */ } 433 }, 434 }; 435 436 static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { 437 [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER, 438 [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1, 439 [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2, 440 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE, 441 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK, 442 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE, 443 [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1, 444 [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, 445 }; 446 447 static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, 448 Error **errp) 449 { 450 BDRVQcowState *s = bs->opaque; 451 unsigned int len, i; 452 int ret = 0; 453 QCowHeader header; 454 QemuOpts *opts; 455 Error *local_err = NULL; 456 uint64_t ext_end; 457 uint64_t l1_vm_state_index; 458 const char *opt_overlap_check; 459 int overlap_check_template = 0; 460 461 ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); 462 if (ret < 0) { 463 error_setg_errno(errp, -ret, "Could not read qcow2 header"); 464 goto fail; 465 } 466 be32_to_cpus(&header.magic); 467 be32_to_cpus(&header.version); 468 be64_to_cpus(&header.backing_file_offset); 469 be32_to_cpus(&header.backing_file_size); 470 be64_to_cpus(&header.size); 471 be32_to_cpus(&header.cluster_bits); 472 be32_to_cpus(&header.crypt_method); 473 be64_to_cpus(&header.l1_table_offset); 474 be32_to_cpus(&header.l1_size); 475 be64_to_cpus(&header.refcount_table_offset); 476 be32_to_cpus(&header.refcount_table_clusters); 477 be64_to_cpus(&header.snapshots_offset); 478 be32_to_cpus(&header.nb_snapshots); 479 480 if (header.magic != QCOW_MAGIC) { 481 error_setg(errp, "Image is not in qcow2 format"); 482 ret = -EINVAL; 483 goto fail; 484 } 485 if (header.version < 2 || header.version > 3) { 486 report_unsupported(bs, errp, "QCOW version %d", header.version); 487 ret = -ENOTSUP; 488 goto fail; 489 } 490 491 s->qcow_version = header.version; 492 493 /* Initialise cluster size */ 494 if (header.cluster_bits < MIN_CLUSTER_BITS || 495 header.cluster_bits > MAX_CLUSTER_BITS) { 496 error_setg(errp, "Unsupported cluster size: 2^%i", header.cluster_bits); 497 ret = -EINVAL; 498 goto fail; 499 } 500 501 s->cluster_bits = header.cluster_bits; 502 s->cluster_size = 1 << s->cluster_bits; 503 s->cluster_sectors = 1 << (s->cluster_bits - 9); 504 505 /* Initialise version 3 header fields */ 506 if (header.version == 2) { 507 header.incompatible_features = 0; 508 header.compatible_features = 0; 509 header.autoclear_features = 0; 510 header.refcount_order = 4; 511 header.header_length = 72; 512 } else { 513 be64_to_cpus(&header.incompatible_features); 514 be64_to_cpus(&header.compatible_features); 515 be64_to_cpus(&header.autoclear_features); 516 be32_to_cpus(&header.refcount_order); 517 be32_to_cpus(&header.header_length); 518 519 if (header.header_length < 104) { 520 error_setg(errp, "qcow2 header too short"); 521 ret = -EINVAL; 522 goto fail; 523 } 524 } 525 526 if (header.header_length > s->cluster_size) { 527 error_setg(errp, "qcow2 header exceeds cluster size"); 528 ret = -EINVAL; 529 goto fail; 530 } 531 532 if (header.header_length > sizeof(header)) { 533 s->unknown_header_fields_size = header.header_length - sizeof(header); 534 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); 535 ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, 536 s->unknown_header_fields_size); 537 if (ret < 0) { 538 error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " 539 "fields"); 540 goto fail; 541 } 542 } 543 544 if (header.backing_file_offset > s->cluster_size) { 545 error_setg(errp, "Invalid backing file offset"); 546 ret = -EINVAL; 547 goto fail; 548 } 549 550 if (header.backing_file_offset) { 551 ext_end = header.backing_file_offset; 552 } else { 553 ext_end = 1 << header.cluster_bits; 554 } 555 556 /* Handle feature bits */ 557 s->incompatible_features = header.incompatible_features; 558 s->compatible_features = header.compatible_features; 559 s->autoclear_features = header.autoclear_features; 560 561 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { 562 void *feature_table = NULL; 563 qcow2_read_extensions(bs, header.header_length, ext_end, 564 &feature_table, NULL); 565 report_unsupported_feature(bs, errp, feature_table, 566 s->incompatible_features & 567 ~QCOW2_INCOMPAT_MASK); 568 ret = -ENOTSUP; 569 g_free(feature_table); 570 goto fail; 571 } 572 573 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 574 /* Corrupt images may not be written to unless they are being repaired 575 */ 576 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { 577 error_setg(errp, "qcow2: Image is corrupt; cannot be opened " 578 "read/write"); 579 ret = -EACCES; 580 goto fail; 581 } 582 } 583 584 /* Check support for various header values */ 585 if (header.refcount_order != 4) { 586 report_unsupported(bs, errp, "%d bit reference counts", 587 1 << header.refcount_order); 588 ret = -ENOTSUP; 589 goto fail; 590 } 591 s->refcount_order = header.refcount_order; 592 593 if (header.crypt_method > QCOW_CRYPT_AES) { 594 error_setg(errp, "Unsupported encryption method: %i", 595 header.crypt_method); 596 ret = -EINVAL; 597 goto fail; 598 } 599 s->crypt_method_header = header.crypt_method; 600 if (s->crypt_method_header) { 601 bs->encrypted = 1; 602 } 603 604 s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ 605 s->l2_size = 1 << s->l2_bits; 606 bs->total_sectors = header.size / 512; 607 s->csize_shift = (62 - (s->cluster_bits - 8)); 608 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; 609 s->cluster_offset_mask = (1LL << s->csize_shift) - 1; 610 611 s->refcount_table_offset = header.refcount_table_offset; 612 s->refcount_table_size = 613 header.refcount_table_clusters << (s->cluster_bits - 3); 614 615 if (header.refcount_table_clusters > qcow2_max_refcount_clusters(s)) { 616 error_setg(errp, "Reference count table too large"); 617 ret = -EINVAL; 618 goto fail; 619 } 620 621 ret = validate_table_offset(bs, s->refcount_table_offset, 622 s->refcount_table_size, sizeof(uint64_t)); 623 if (ret < 0) { 624 error_setg(errp, "Invalid reference count table offset"); 625 goto fail; 626 } 627 628 /* Snapshot table offset/length */ 629 if (header.nb_snapshots > QCOW_MAX_SNAPSHOTS) { 630 error_setg(errp, "Too many snapshots"); 631 ret = -EINVAL; 632 goto fail; 633 } 634 635 ret = validate_table_offset(bs, header.snapshots_offset, 636 header.nb_snapshots, 637 sizeof(QCowSnapshotHeader)); 638 if (ret < 0) { 639 error_setg(errp, "Invalid snapshot table offset"); 640 goto fail; 641 } 642 643 /* read the level 1 table */ 644 if (header.l1_size > QCOW_MAX_L1_SIZE) { 645 error_setg(errp, "Active L1 table too large"); 646 ret = -EFBIG; 647 goto fail; 648 } 649 s->l1_size = header.l1_size; 650 651 l1_vm_state_index = size_to_l1(s, header.size); 652 if (l1_vm_state_index > INT_MAX) { 653 error_setg(errp, "Image is too big"); 654 ret = -EFBIG; 655 goto fail; 656 } 657 s->l1_vm_state_index = l1_vm_state_index; 658 659 /* the L1 table must contain at least enough entries to put 660 header.size bytes */ 661 if (s->l1_size < s->l1_vm_state_index) { 662 error_setg(errp, "L1 table is too small"); 663 ret = -EINVAL; 664 goto fail; 665 } 666 667 ret = validate_table_offset(bs, header.l1_table_offset, 668 header.l1_size, sizeof(uint64_t)); 669 if (ret < 0) { 670 error_setg(errp, "Invalid L1 table offset"); 671 goto fail; 672 } 673 s->l1_table_offset = header.l1_table_offset; 674 675 676 if (s->l1_size > 0) { 677 s->l1_table = g_malloc0( 678 align_offset(s->l1_size * sizeof(uint64_t), 512)); 679 ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, 680 s->l1_size * sizeof(uint64_t)); 681 if (ret < 0) { 682 error_setg_errno(errp, -ret, "Could not read L1 table"); 683 goto fail; 684 } 685 for(i = 0;i < s->l1_size; i++) { 686 be64_to_cpus(&s->l1_table[i]); 687 } 688 } 689 690 /* alloc L2 table/refcount block cache */ 691 s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE); 692 s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE); 693 694 s->cluster_cache = g_malloc(s->cluster_size); 695 /* one more sector for decompressed data alignment */ 696 s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size 697 + 512); 698 s->cluster_cache_offset = -1; 699 s->flags = flags; 700 701 ret = qcow2_refcount_init(bs); 702 if (ret != 0) { 703 error_setg_errno(errp, -ret, "Could not initialize refcount handling"); 704 goto fail; 705 } 706 707 QLIST_INIT(&s->cluster_allocs); 708 QTAILQ_INIT(&s->discards); 709 710 /* read qcow2 extensions */ 711 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL, 712 &local_err)) { 713 error_propagate(errp, local_err); 714 ret = -EINVAL; 715 goto fail; 716 } 717 718 /* read the backing file name */ 719 if (header.backing_file_offset != 0) { 720 len = header.backing_file_size; 721 if (len > MIN(1023, s->cluster_size - header.backing_file_offset)) { 722 error_setg(errp, "Backing file name too long"); 723 ret = -EINVAL; 724 goto fail; 725 } 726 ret = bdrv_pread(bs->file, header.backing_file_offset, 727 bs->backing_file, len); 728 if (ret < 0) { 729 error_setg_errno(errp, -ret, "Could not read backing file name"); 730 goto fail; 731 } 732 bs->backing_file[len] = '\0'; 733 } 734 735 /* Internal snapshots */ 736 s->snapshots_offset = header.snapshots_offset; 737 s->nb_snapshots = header.nb_snapshots; 738 739 ret = qcow2_read_snapshots(bs); 740 if (ret < 0) { 741 error_setg_errno(errp, -ret, "Could not read snapshots"); 742 goto fail; 743 } 744 745 /* Clear unknown autoclear feature bits */ 746 if (!bs->read_only && !(flags & BDRV_O_INCOMING) && s->autoclear_features) { 747 s->autoclear_features = 0; 748 ret = qcow2_update_header(bs); 749 if (ret < 0) { 750 error_setg_errno(errp, -ret, "Could not update qcow2 header"); 751 goto fail; 752 } 753 } 754 755 /* Initialise locks */ 756 qemu_co_mutex_init(&s->lock); 757 758 /* Repair image if dirty */ 759 if (!(flags & (BDRV_O_CHECK | BDRV_O_INCOMING)) && !bs->read_only && 760 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { 761 BdrvCheckResult result = {0}; 762 763 ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS); 764 if (ret < 0) { 765 error_setg_errno(errp, -ret, "Could not repair dirty image"); 766 goto fail; 767 } 768 } 769 770 /* Enable lazy_refcounts according to image and command line options */ 771 opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); 772 qemu_opts_absorb_qdict(opts, options, &local_err); 773 if (local_err) { 774 error_propagate(errp, local_err); 775 ret = -EINVAL; 776 goto fail; 777 } 778 779 s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, 780 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); 781 782 s->discard_passthrough[QCOW2_DISCARD_NEVER] = false; 783 s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; 784 s->discard_passthrough[QCOW2_DISCARD_REQUEST] = 785 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, 786 flags & BDRV_O_UNMAP); 787 s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = 788 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); 789 s->discard_passthrough[QCOW2_DISCARD_OTHER] = 790 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); 791 792 opt_overlap_check = qemu_opt_get(opts, "overlap-check") ?: "cached"; 793 if (!strcmp(opt_overlap_check, "none")) { 794 overlap_check_template = 0; 795 } else if (!strcmp(opt_overlap_check, "constant")) { 796 overlap_check_template = QCOW2_OL_CONSTANT; 797 } else if (!strcmp(opt_overlap_check, "cached")) { 798 overlap_check_template = QCOW2_OL_CACHED; 799 } else if (!strcmp(opt_overlap_check, "all")) { 800 overlap_check_template = QCOW2_OL_ALL; 801 } else { 802 error_setg(errp, "Unsupported value '%s' for qcow2 option " 803 "'overlap-check'. Allowed are either of the following: " 804 "none, constant, cached, all", opt_overlap_check); 805 qemu_opts_del(opts); 806 ret = -EINVAL; 807 goto fail; 808 } 809 810 s->overlap_check = 0; 811 for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { 812 /* overlap-check defines a template bitmask, but every flag may be 813 * overwritten through the associated boolean option */ 814 s->overlap_check |= 815 qemu_opt_get_bool(opts, overlap_bool_option_names[i], 816 overlap_check_template & (1 << i)) << i; 817 } 818 819 qemu_opts_del(opts); 820 821 if (s->use_lazy_refcounts && s->qcow_version < 3) { 822 error_setg(errp, "Lazy refcounts require a qcow2 image with at least " 823 "qemu 1.1 compatibility level"); 824 ret = -EINVAL; 825 goto fail; 826 } 827 828 #ifdef DEBUG_ALLOC 829 { 830 BdrvCheckResult result = {0}; 831 qcow2_check_refcounts(bs, &result, 0); 832 } 833 #endif 834 return ret; 835 836 fail: 837 g_free(s->unknown_header_fields); 838 cleanup_unknown_header_ext(bs); 839 qcow2_free_snapshots(bs); 840 qcow2_refcount_close(bs); 841 g_free(s->l1_table); 842 /* else pre-write overlap checks in cache_destroy may crash */ 843 s->l1_table = NULL; 844 if (s->l2_table_cache) { 845 qcow2_cache_destroy(bs, s->l2_table_cache); 846 } 847 if (s->refcount_block_cache) { 848 qcow2_cache_destroy(bs, s->refcount_block_cache); 849 } 850 g_free(s->cluster_cache); 851 qemu_vfree(s->cluster_data); 852 return ret; 853 } 854 855 static int qcow2_refresh_limits(BlockDriverState *bs) 856 { 857 BDRVQcowState *s = bs->opaque; 858 859 bs->bl.write_zeroes_alignment = s->cluster_sectors; 860 861 return 0; 862 } 863 864 static int qcow2_set_key(BlockDriverState *bs, const char *key) 865 { 866 BDRVQcowState *s = bs->opaque; 867 uint8_t keybuf[16]; 868 int len, i; 869 870 memset(keybuf, 0, 16); 871 len = strlen(key); 872 if (len > 16) 873 len = 16; 874 /* XXX: we could compress the chars to 7 bits to increase 875 entropy */ 876 for(i = 0;i < len;i++) { 877 keybuf[i] = key[i]; 878 } 879 s->crypt_method = s->crypt_method_header; 880 881 if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) 882 return -1; 883 if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) 884 return -1; 885 #if 0 886 /* test */ 887 { 888 uint8_t in[16]; 889 uint8_t out[16]; 890 uint8_t tmp[16]; 891 for(i=0;i<16;i++) 892 in[i] = i; 893 AES_encrypt(in, tmp, &s->aes_encrypt_key); 894 AES_decrypt(tmp, out, &s->aes_decrypt_key); 895 for(i = 0; i < 16; i++) 896 printf(" %02x", tmp[i]); 897 printf("\n"); 898 for(i = 0; i < 16; i++) 899 printf(" %02x", out[i]); 900 printf("\n"); 901 } 902 #endif 903 return 0; 904 } 905 906 /* We have no actual commit/abort logic for qcow2, but we need to write out any 907 * unwritten data if we reopen read-only. */ 908 static int qcow2_reopen_prepare(BDRVReopenState *state, 909 BlockReopenQueue *queue, Error **errp) 910 { 911 int ret; 912 913 if ((state->flags & BDRV_O_RDWR) == 0) { 914 ret = bdrv_flush(state->bs); 915 if (ret < 0) { 916 return ret; 917 } 918 919 ret = qcow2_mark_clean(state->bs); 920 if (ret < 0) { 921 return ret; 922 } 923 } 924 925 return 0; 926 } 927 928 static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, 929 int64_t sector_num, int nb_sectors, int *pnum) 930 { 931 BDRVQcowState *s = bs->opaque; 932 uint64_t cluster_offset; 933 int index_in_cluster, ret; 934 int64_t status = 0; 935 936 *pnum = nb_sectors; 937 qemu_co_mutex_lock(&s->lock); 938 ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset); 939 qemu_co_mutex_unlock(&s->lock); 940 if (ret < 0) { 941 return ret; 942 } 943 944 if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && 945 !s->crypt_method) { 946 index_in_cluster = sector_num & (s->cluster_sectors - 1); 947 cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); 948 status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset; 949 } 950 if (ret == QCOW2_CLUSTER_ZERO) { 951 status |= BDRV_BLOCK_ZERO; 952 } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { 953 status |= BDRV_BLOCK_DATA; 954 } 955 return status; 956 } 957 958 /* handle reading after the end of the backing file */ 959 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, 960 int64_t sector_num, int nb_sectors) 961 { 962 int n1; 963 if ((sector_num + nb_sectors) <= bs->total_sectors) 964 return nb_sectors; 965 if (sector_num >= bs->total_sectors) 966 n1 = 0; 967 else 968 n1 = bs->total_sectors - sector_num; 969 970 qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1)); 971 972 return n1; 973 } 974 975 static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, 976 int remaining_sectors, QEMUIOVector *qiov) 977 { 978 BDRVQcowState *s = bs->opaque; 979 int index_in_cluster, n1; 980 int ret; 981 int cur_nr_sectors; /* number of sectors in current iteration */ 982 uint64_t cluster_offset = 0; 983 uint64_t bytes_done = 0; 984 QEMUIOVector hd_qiov; 985 uint8_t *cluster_data = NULL; 986 987 qemu_iovec_init(&hd_qiov, qiov->niov); 988 989 qemu_co_mutex_lock(&s->lock); 990 991 while (remaining_sectors != 0) { 992 993 /* prepare next request */ 994 cur_nr_sectors = remaining_sectors; 995 if (s->crypt_method) { 996 cur_nr_sectors = MIN(cur_nr_sectors, 997 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); 998 } 999 1000 ret = qcow2_get_cluster_offset(bs, sector_num << 9, 1001 &cur_nr_sectors, &cluster_offset); 1002 if (ret < 0) { 1003 goto fail; 1004 } 1005 1006 index_in_cluster = sector_num & (s->cluster_sectors - 1); 1007 1008 qemu_iovec_reset(&hd_qiov); 1009 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, 1010 cur_nr_sectors * 512); 1011 1012 switch (ret) { 1013 case QCOW2_CLUSTER_UNALLOCATED: 1014 1015 if (bs->backing_hd) { 1016 /* read from the base image */ 1017 n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov, 1018 sector_num, cur_nr_sectors); 1019 if (n1 > 0) { 1020 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 1021 qemu_co_mutex_unlock(&s->lock); 1022 ret = bdrv_co_readv(bs->backing_hd, sector_num, 1023 n1, &hd_qiov); 1024 qemu_co_mutex_lock(&s->lock); 1025 if (ret < 0) { 1026 goto fail; 1027 } 1028 } 1029 } else { 1030 /* Note: in this case, no need to wait */ 1031 qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); 1032 } 1033 break; 1034 1035 case QCOW2_CLUSTER_ZERO: 1036 qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); 1037 break; 1038 1039 case QCOW2_CLUSTER_COMPRESSED: 1040 /* add AIO support for compressed blocks ? */ 1041 ret = qcow2_decompress_cluster(bs, cluster_offset); 1042 if (ret < 0) { 1043 goto fail; 1044 } 1045 1046 qemu_iovec_from_buf(&hd_qiov, 0, 1047 s->cluster_cache + index_in_cluster * 512, 1048 512 * cur_nr_sectors); 1049 break; 1050 1051 case QCOW2_CLUSTER_NORMAL: 1052 if ((cluster_offset & 511) != 0) { 1053 ret = -EIO; 1054 goto fail; 1055 } 1056 1057 if (s->crypt_method) { 1058 /* 1059 * For encrypted images, read everything into a temporary 1060 * contiguous buffer on which the AES functions can work. 1061 */ 1062 if (!cluster_data) { 1063 cluster_data = 1064 qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 1065 } 1066 1067 assert(cur_nr_sectors <= 1068 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); 1069 qemu_iovec_reset(&hd_qiov); 1070 qemu_iovec_add(&hd_qiov, cluster_data, 1071 512 * cur_nr_sectors); 1072 } 1073 1074 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 1075 qemu_co_mutex_unlock(&s->lock); 1076 ret = bdrv_co_readv(bs->file, 1077 (cluster_offset >> 9) + index_in_cluster, 1078 cur_nr_sectors, &hd_qiov); 1079 qemu_co_mutex_lock(&s->lock); 1080 if (ret < 0) { 1081 goto fail; 1082 } 1083 if (s->crypt_method) { 1084 qcow2_encrypt_sectors(s, sector_num, cluster_data, 1085 cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key); 1086 qemu_iovec_from_buf(qiov, bytes_done, 1087 cluster_data, 512 * cur_nr_sectors); 1088 } 1089 break; 1090 1091 default: 1092 g_assert_not_reached(); 1093 ret = -EIO; 1094 goto fail; 1095 } 1096 1097 remaining_sectors -= cur_nr_sectors; 1098 sector_num += cur_nr_sectors; 1099 bytes_done += cur_nr_sectors * 512; 1100 } 1101 ret = 0; 1102 1103 fail: 1104 qemu_co_mutex_unlock(&s->lock); 1105 1106 qemu_iovec_destroy(&hd_qiov); 1107 qemu_vfree(cluster_data); 1108 1109 return ret; 1110 } 1111 1112 static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, 1113 int64_t sector_num, 1114 int remaining_sectors, 1115 QEMUIOVector *qiov) 1116 { 1117 BDRVQcowState *s = bs->opaque; 1118 int index_in_cluster; 1119 int ret; 1120 int cur_nr_sectors; /* number of sectors in current iteration */ 1121 uint64_t cluster_offset; 1122 QEMUIOVector hd_qiov; 1123 uint64_t bytes_done = 0; 1124 uint8_t *cluster_data = NULL; 1125 QCowL2Meta *l2meta = NULL; 1126 1127 trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, 1128 remaining_sectors); 1129 1130 qemu_iovec_init(&hd_qiov, qiov->niov); 1131 1132 s->cluster_cache_offset = -1; /* disable compressed cache */ 1133 1134 qemu_co_mutex_lock(&s->lock); 1135 1136 while (remaining_sectors != 0) { 1137 1138 l2meta = NULL; 1139 1140 trace_qcow2_writev_start_part(qemu_coroutine_self()); 1141 index_in_cluster = sector_num & (s->cluster_sectors - 1); 1142 cur_nr_sectors = remaining_sectors; 1143 if (s->crypt_method && 1144 cur_nr_sectors > 1145 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) { 1146 cur_nr_sectors = 1147 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster; 1148 } 1149 1150 ret = qcow2_alloc_cluster_offset(bs, sector_num << 9, 1151 &cur_nr_sectors, &cluster_offset, &l2meta); 1152 if (ret < 0) { 1153 goto fail; 1154 } 1155 1156 assert((cluster_offset & 511) == 0); 1157 1158 qemu_iovec_reset(&hd_qiov); 1159 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, 1160 cur_nr_sectors * 512); 1161 1162 if (s->crypt_method) { 1163 if (!cluster_data) { 1164 cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * 1165 s->cluster_size); 1166 } 1167 1168 assert(hd_qiov.size <= 1169 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 1170 qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); 1171 1172 qcow2_encrypt_sectors(s, sector_num, cluster_data, 1173 cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key); 1174 1175 qemu_iovec_reset(&hd_qiov); 1176 qemu_iovec_add(&hd_qiov, cluster_data, 1177 cur_nr_sectors * 512); 1178 } 1179 1180 ret = qcow2_pre_write_overlap_check(bs, 0, 1181 cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE, 1182 cur_nr_sectors * BDRV_SECTOR_SIZE); 1183 if (ret < 0) { 1184 goto fail; 1185 } 1186 1187 qemu_co_mutex_unlock(&s->lock); 1188 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 1189 trace_qcow2_writev_data(qemu_coroutine_self(), 1190 (cluster_offset >> 9) + index_in_cluster); 1191 ret = bdrv_co_writev(bs->file, 1192 (cluster_offset >> 9) + index_in_cluster, 1193 cur_nr_sectors, &hd_qiov); 1194 qemu_co_mutex_lock(&s->lock); 1195 if (ret < 0) { 1196 goto fail; 1197 } 1198 1199 while (l2meta != NULL) { 1200 QCowL2Meta *next; 1201 1202 ret = qcow2_alloc_cluster_link_l2(bs, l2meta); 1203 if (ret < 0) { 1204 goto fail; 1205 } 1206 1207 /* Take the request off the list of running requests */ 1208 if (l2meta->nb_clusters != 0) { 1209 QLIST_REMOVE(l2meta, next_in_flight); 1210 } 1211 1212 qemu_co_queue_restart_all(&l2meta->dependent_requests); 1213 1214 next = l2meta->next; 1215 g_free(l2meta); 1216 l2meta = next; 1217 } 1218 1219 remaining_sectors -= cur_nr_sectors; 1220 sector_num += cur_nr_sectors; 1221 bytes_done += cur_nr_sectors * 512; 1222 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors); 1223 } 1224 ret = 0; 1225 1226 fail: 1227 qemu_co_mutex_unlock(&s->lock); 1228 1229 while (l2meta != NULL) { 1230 QCowL2Meta *next; 1231 1232 if (l2meta->nb_clusters != 0) { 1233 QLIST_REMOVE(l2meta, next_in_flight); 1234 } 1235 qemu_co_queue_restart_all(&l2meta->dependent_requests); 1236 1237 next = l2meta->next; 1238 g_free(l2meta); 1239 l2meta = next; 1240 } 1241 1242 qemu_iovec_destroy(&hd_qiov); 1243 qemu_vfree(cluster_data); 1244 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 1245 1246 return ret; 1247 } 1248 1249 static void qcow2_close(BlockDriverState *bs) 1250 { 1251 BDRVQcowState *s = bs->opaque; 1252 g_free(s->l1_table); 1253 /* else pre-write overlap checks in cache_destroy may crash */ 1254 s->l1_table = NULL; 1255 1256 if (!(bs->open_flags & BDRV_O_INCOMING)) { 1257 qcow2_cache_flush(bs, s->l2_table_cache); 1258 qcow2_cache_flush(bs, s->refcount_block_cache); 1259 1260 qcow2_mark_clean(bs); 1261 } 1262 1263 qcow2_cache_destroy(bs, s->l2_table_cache); 1264 qcow2_cache_destroy(bs, s->refcount_block_cache); 1265 1266 g_free(s->unknown_header_fields); 1267 cleanup_unknown_header_ext(bs); 1268 1269 g_free(s->cluster_cache); 1270 qemu_vfree(s->cluster_data); 1271 qcow2_refcount_close(bs); 1272 qcow2_free_snapshots(bs); 1273 } 1274 1275 static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) 1276 { 1277 BDRVQcowState *s = bs->opaque; 1278 int flags = s->flags; 1279 AES_KEY aes_encrypt_key; 1280 AES_KEY aes_decrypt_key; 1281 uint32_t crypt_method = 0; 1282 QDict *options; 1283 Error *local_err = NULL; 1284 int ret; 1285 1286 /* 1287 * Backing files are read-only which makes all of their metadata immutable, 1288 * that means we don't have to worry about reopening them here. 1289 */ 1290 1291 if (s->crypt_method) { 1292 crypt_method = s->crypt_method; 1293 memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key)); 1294 memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key)); 1295 } 1296 1297 qcow2_close(bs); 1298 1299 bdrv_invalidate_cache(bs->file, &local_err); 1300 if (local_err) { 1301 error_propagate(errp, local_err); 1302 return; 1303 } 1304 1305 memset(s, 0, sizeof(BDRVQcowState)); 1306 options = qdict_clone_shallow(bs->options); 1307 1308 ret = qcow2_open(bs, options, flags, &local_err); 1309 if (local_err) { 1310 error_setg(errp, "Could not reopen qcow2 layer: %s", 1311 error_get_pretty(local_err)); 1312 error_free(local_err); 1313 return; 1314 } else if (ret < 0) { 1315 error_setg_errno(errp, -ret, "Could not reopen qcow2 layer"); 1316 return; 1317 } 1318 1319 QDECREF(options); 1320 1321 if (crypt_method) { 1322 s->crypt_method = crypt_method; 1323 memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key)); 1324 memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key)); 1325 } 1326 } 1327 1328 static size_t header_ext_add(char *buf, uint32_t magic, const void *s, 1329 size_t len, size_t buflen) 1330 { 1331 QCowExtension *ext_backing_fmt = (QCowExtension*) buf; 1332 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); 1333 1334 if (buflen < ext_len) { 1335 return -ENOSPC; 1336 } 1337 1338 *ext_backing_fmt = (QCowExtension) { 1339 .magic = cpu_to_be32(magic), 1340 .len = cpu_to_be32(len), 1341 }; 1342 memcpy(buf + sizeof(QCowExtension), s, len); 1343 1344 return ext_len; 1345 } 1346 1347 /* 1348 * Updates the qcow2 header, including the variable length parts of it, i.e. 1349 * the backing file name and all extensions. qcow2 was not designed to allow 1350 * such changes, so if we run out of space (we can only use the first cluster) 1351 * this function may fail. 1352 * 1353 * Returns 0 on success, -errno in error cases. 1354 */ 1355 int qcow2_update_header(BlockDriverState *bs) 1356 { 1357 BDRVQcowState *s = bs->opaque; 1358 QCowHeader *header; 1359 char *buf; 1360 size_t buflen = s->cluster_size; 1361 int ret; 1362 uint64_t total_size; 1363 uint32_t refcount_table_clusters; 1364 size_t header_length; 1365 Qcow2UnknownHeaderExtension *uext; 1366 1367 buf = qemu_blockalign(bs, buflen); 1368 1369 /* Header structure */ 1370 header = (QCowHeader*) buf; 1371 1372 if (buflen < sizeof(*header)) { 1373 ret = -ENOSPC; 1374 goto fail; 1375 } 1376 1377 header_length = sizeof(*header) + s->unknown_header_fields_size; 1378 total_size = bs->total_sectors * BDRV_SECTOR_SIZE; 1379 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); 1380 1381 *header = (QCowHeader) { 1382 /* Version 2 fields */ 1383 .magic = cpu_to_be32(QCOW_MAGIC), 1384 .version = cpu_to_be32(s->qcow_version), 1385 .backing_file_offset = 0, 1386 .backing_file_size = 0, 1387 .cluster_bits = cpu_to_be32(s->cluster_bits), 1388 .size = cpu_to_be64(total_size), 1389 .crypt_method = cpu_to_be32(s->crypt_method_header), 1390 .l1_size = cpu_to_be32(s->l1_size), 1391 .l1_table_offset = cpu_to_be64(s->l1_table_offset), 1392 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset), 1393 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), 1394 .nb_snapshots = cpu_to_be32(s->nb_snapshots), 1395 .snapshots_offset = cpu_to_be64(s->snapshots_offset), 1396 1397 /* Version 3 fields */ 1398 .incompatible_features = cpu_to_be64(s->incompatible_features), 1399 .compatible_features = cpu_to_be64(s->compatible_features), 1400 .autoclear_features = cpu_to_be64(s->autoclear_features), 1401 .refcount_order = cpu_to_be32(s->refcount_order), 1402 .header_length = cpu_to_be32(header_length), 1403 }; 1404 1405 /* For older versions, write a shorter header */ 1406 switch (s->qcow_version) { 1407 case 2: 1408 ret = offsetof(QCowHeader, incompatible_features); 1409 break; 1410 case 3: 1411 ret = sizeof(*header); 1412 break; 1413 default: 1414 ret = -EINVAL; 1415 goto fail; 1416 } 1417 1418 buf += ret; 1419 buflen -= ret; 1420 memset(buf, 0, buflen); 1421 1422 /* Preserve any unknown field in the header */ 1423 if (s->unknown_header_fields_size) { 1424 if (buflen < s->unknown_header_fields_size) { 1425 ret = -ENOSPC; 1426 goto fail; 1427 } 1428 1429 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); 1430 buf += s->unknown_header_fields_size; 1431 buflen -= s->unknown_header_fields_size; 1432 } 1433 1434 /* Backing file format header extension */ 1435 if (*bs->backing_format) { 1436 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, 1437 bs->backing_format, strlen(bs->backing_format), 1438 buflen); 1439 if (ret < 0) { 1440 goto fail; 1441 } 1442 1443 buf += ret; 1444 buflen -= ret; 1445 } 1446 1447 /* Feature table */ 1448 Qcow2Feature features[] = { 1449 { 1450 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 1451 .bit = QCOW2_INCOMPAT_DIRTY_BITNR, 1452 .name = "dirty bit", 1453 }, 1454 { 1455 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 1456 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, 1457 .name = "corrupt bit", 1458 }, 1459 { 1460 .type = QCOW2_FEAT_TYPE_COMPATIBLE, 1461 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, 1462 .name = "lazy refcounts", 1463 }, 1464 }; 1465 1466 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, 1467 features, sizeof(features), buflen); 1468 if (ret < 0) { 1469 goto fail; 1470 } 1471 buf += ret; 1472 buflen -= ret; 1473 1474 /* Keep unknown header extensions */ 1475 QLIST_FOREACH(uext, &s->unknown_header_ext, next) { 1476 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); 1477 if (ret < 0) { 1478 goto fail; 1479 } 1480 1481 buf += ret; 1482 buflen -= ret; 1483 } 1484 1485 /* End of header extensions */ 1486 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); 1487 if (ret < 0) { 1488 goto fail; 1489 } 1490 1491 buf += ret; 1492 buflen -= ret; 1493 1494 /* Backing file name */ 1495 if (*bs->backing_file) { 1496 size_t backing_file_len = strlen(bs->backing_file); 1497 1498 if (buflen < backing_file_len) { 1499 ret = -ENOSPC; 1500 goto fail; 1501 } 1502 1503 /* Using strncpy is ok here, since buf is not NUL-terminated. */ 1504 strncpy(buf, bs->backing_file, buflen); 1505 1506 header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); 1507 header->backing_file_size = cpu_to_be32(backing_file_len); 1508 } 1509 1510 /* Write the new header */ 1511 ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); 1512 if (ret < 0) { 1513 goto fail; 1514 } 1515 1516 ret = 0; 1517 fail: 1518 qemu_vfree(header); 1519 return ret; 1520 } 1521 1522 static int qcow2_change_backing_file(BlockDriverState *bs, 1523 const char *backing_file, const char *backing_fmt) 1524 { 1525 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 1526 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 1527 1528 return qcow2_update_header(bs); 1529 } 1530 1531 static int preallocate(BlockDriverState *bs) 1532 { 1533 uint64_t nb_sectors; 1534 uint64_t offset; 1535 uint64_t host_offset = 0; 1536 int num; 1537 int ret; 1538 QCowL2Meta *meta; 1539 1540 nb_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; 1541 offset = 0; 1542 1543 while (nb_sectors) { 1544 num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS); 1545 ret = qcow2_alloc_cluster_offset(bs, offset, &num, 1546 &host_offset, &meta); 1547 if (ret < 0) { 1548 return ret; 1549 } 1550 1551 while (meta) { 1552 QCowL2Meta *next = meta->next; 1553 1554 ret = qcow2_alloc_cluster_link_l2(bs, meta); 1555 if (ret < 0) { 1556 qcow2_free_any_clusters(bs, meta->alloc_offset, 1557 meta->nb_clusters, QCOW2_DISCARD_NEVER); 1558 return ret; 1559 } 1560 1561 /* There are no dependent requests, but we need to remove our 1562 * request from the list of in-flight requests */ 1563 QLIST_REMOVE(meta, next_in_flight); 1564 1565 g_free(meta); 1566 meta = next; 1567 } 1568 1569 /* TODO Preallocate data if requested */ 1570 1571 nb_sectors -= num; 1572 offset += num << BDRV_SECTOR_BITS; 1573 } 1574 1575 /* 1576 * It is expected that the image file is large enough to actually contain 1577 * all of the allocated clusters (otherwise we get failing reads after 1578 * EOF). Extend the image to the last allocated sector. 1579 */ 1580 if (host_offset != 0) { 1581 uint8_t buf[BDRV_SECTOR_SIZE]; 1582 memset(buf, 0, BDRV_SECTOR_SIZE); 1583 ret = bdrv_write(bs->file, (host_offset >> BDRV_SECTOR_BITS) + num - 1, 1584 buf, 1); 1585 if (ret < 0) { 1586 return ret; 1587 } 1588 } 1589 1590 return 0; 1591 } 1592 1593 static int qcow2_create2(const char *filename, int64_t total_size, 1594 const char *backing_file, const char *backing_format, 1595 int flags, size_t cluster_size, int prealloc, 1596 QEMUOptionParameter *options, int version, 1597 Error **errp) 1598 { 1599 /* Calculate cluster_bits */ 1600 int cluster_bits; 1601 cluster_bits = ffs(cluster_size) - 1; 1602 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || 1603 (1 << cluster_bits) != cluster_size) 1604 { 1605 error_setg(errp, "Cluster size must be a power of two between %d and " 1606 "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); 1607 return -EINVAL; 1608 } 1609 1610 /* 1611 * Open the image file and write a minimal qcow2 header. 1612 * 1613 * We keep things simple and start with a zero-sized image. We also 1614 * do without refcount blocks or a L1 table for now. We'll fix the 1615 * inconsistency later. 1616 * 1617 * We do need a refcount table because growing the refcount table means 1618 * allocating two new refcount blocks - the seconds of which would be at 1619 * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file 1620 * size for any qcow2 image. 1621 */ 1622 BlockDriverState* bs; 1623 QCowHeader *header; 1624 uint64_t* refcount_table; 1625 Error *local_err = NULL; 1626 int ret; 1627 1628 ret = bdrv_create_file(filename, options, &local_err); 1629 if (ret < 0) { 1630 error_propagate(errp, local_err); 1631 return ret; 1632 } 1633 1634 bs = NULL; 1635 ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, 1636 NULL, &local_err); 1637 if (ret < 0) { 1638 error_propagate(errp, local_err); 1639 return ret; 1640 } 1641 1642 /* Write the header */ 1643 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); 1644 header = g_malloc0(cluster_size); 1645 *header = (QCowHeader) { 1646 .magic = cpu_to_be32(QCOW_MAGIC), 1647 .version = cpu_to_be32(version), 1648 .cluster_bits = cpu_to_be32(cluster_bits), 1649 .size = cpu_to_be64(0), 1650 .l1_table_offset = cpu_to_be64(0), 1651 .l1_size = cpu_to_be32(0), 1652 .refcount_table_offset = cpu_to_be64(cluster_size), 1653 .refcount_table_clusters = cpu_to_be32(1), 1654 .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT), 1655 .header_length = cpu_to_be32(sizeof(*header)), 1656 }; 1657 1658 if (flags & BLOCK_FLAG_ENCRYPT) { 1659 header->crypt_method = cpu_to_be32(QCOW_CRYPT_AES); 1660 } else { 1661 header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 1662 } 1663 1664 if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) { 1665 header->compatible_features |= 1666 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); 1667 } 1668 1669 ret = bdrv_pwrite(bs, 0, header, cluster_size); 1670 g_free(header); 1671 if (ret < 0) { 1672 error_setg_errno(errp, -ret, "Could not write qcow2 header"); 1673 goto out; 1674 } 1675 1676 /* Write a refcount table with one refcount block */ 1677 refcount_table = g_malloc0(2 * cluster_size); 1678 refcount_table[0] = cpu_to_be64(2 * cluster_size); 1679 ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size); 1680 g_free(refcount_table); 1681 1682 if (ret < 0) { 1683 error_setg_errno(errp, -ret, "Could not write refcount table"); 1684 goto out; 1685 } 1686 1687 bdrv_unref(bs); 1688 bs = NULL; 1689 1690 /* 1691 * And now open the image and make it consistent first (i.e. increase the 1692 * refcount of the cluster that is occupied by the header and the refcount 1693 * table) 1694 */ 1695 BlockDriver* drv = bdrv_find_format("qcow2"); 1696 assert(drv != NULL); 1697 ret = bdrv_open(&bs, filename, NULL, NULL, 1698 BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv, &local_err); 1699 if (ret < 0) { 1700 error_propagate(errp, local_err); 1701 goto out; 1702 } 1703 1704 ret = qcow2_alloc_clusters(bs, 3 * cluster_size); 1705 if (ret < 0) { 1706 error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " 1707 "header and refcount table"); 1708 goto out; 1709 1710 } else if (ret != 0) { 1711 error_report("Huh, first cluster in empty image is already in use?"); 1712 abort(); 1713 } 1714 1715 /* Okay, now that we have a valid image, let's give it the right size */ 1716 ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE); 1717 if (ret < 0) { 1718 error_setg_errno(errp, -ret, "Could not resize image"); 1719 goto out; 1720 } 1721 1722 /* Want a backing file? There you go.*/ 1723 if (backing_file) { 1724 ret = bdrv_change_backing_file(bs, backing_file, backing_format); 1725 if (ret < 0) { 1726 error_setg_errno(errp, -ret, "Could not assign backing file '%s' " 1727 "with format '%s'", backing_file, backing_format); 1728 goto out; 1729 } 1730 } 1731 1732 /* And if we're supposed to preallocate metadata, do that now */ 1733 if (prealloc) { 1734 BDRVQcowState *s = bs->opaque; 1735 qemu_co_mutex_lock(&s->lock); 1736 ret = preallocate(bs); 1737 qemu_co_mutex_unlock(&s->lock); 1738 if (ret < 0) { 1739 error_setg_errno(errp, -ret, "Could not preallocate metadata"); 1740 goto out; 1741 } 1742 } 1743 1744 bdrv_unref(bs); 1745 bs = NULL; 1746 1747 /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ 1748 ret = bdrv_open(&bs, filename, NULL, NULL, 1749 BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, 1750 drv, &local_err); 1751 if (local_err) { 1752 error_propagate(errp, local_err); 1753 goto out; 1754 } 1755 1756 ret = 0; 1757 out: 1758 if (bs) { 1759 bdrv_unref(bs); 1760 } 1761 return ret; 1762 } 1763 1764 static int qcow2_create(const char *filename, QEMUOptionParameter *options, 1765 Error **errp) 1766 { 1767 const char *backing_file = NULL; 1768 const char *backing_fmt = NULL; 1769 uint64_t sectors = 0; 1770 int flags = 0; 1771 size_t cluster_size = DEFAULT_CLUSTER_SIZE; 1772 int prealloc = 0; 1773 int version = 3; 1774 Error *local_err = NULL; 1775 int ret; 1776 1777 /* Read out options */ 1778 while (options && options->name) { 1779 if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 1780 sectors = options->value.n / 512; 1781 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { 1782 backing_file = options->value.s; 1783 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) { 1784 backing_fmt = options->value.s; 1785 } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) { 1786 flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0; 1787 } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { 1788 if (options->value.n) { 1789 cluster_size = options->value.n; 1790 } 1791 } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { 1792 if (!options->value.s || !strcmp(options->value.s, "off")) { 1793 prealloc = 0; 1794 } else if (!strcmp(options->value.s, "metadata")) { 1795 prealloc = 1; 1796 } else { 1797 error_setg(errp, "Invalid preallocation mode: '%s'", 1798 options->value.s); 1799 return -EINVAL; 1800 } 1801 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) { 1802 if (!options->value.s) { 1803 /* keep the default */ 1804 } else if (!strcmp(options->value.s, "0.10")) { 1805 version = 2; 1806 } else if (!strcmp(options->value.s, "1.1")) { 1807 version = 3; 1808 } else { 1809 error_setg(errp, "Invalid compatibility level: '%s'", 1810 options->value.s); 1811 return -EINVAL; 1812 } 1813 } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) { 1814 flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0; 1815 } 1816 options++; 1817 } 1818 1819 if (backing_file && prealloc) { 1820 error_setg(errp, "Backing file and preallocation cannot be used at " 1821 "the same time"); 1822 return -EINVAL; 1823 } 1824 1825 if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) { 1826 error_setg(errp, "Lazy refcounts only supported with compatibility " 1827 "level 1.1 and above (use compat=1.1 or greater)"); 1828 return -EINVAL; 1829 } 1830 1831 ret = qcow2_create2(filename, sectors, backing_file, backing_fmt, flags, 1832 cluster_size, prealloc, options, version, &local_err); 1833 if (local_err) { 1834 error_propagate(errp, local_err); 1835 } 1836 return ret; 1837 } 1838 1839 static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, 1840 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) 1841 { 1842 int ret; 1843 BDRVQcowState *s = bs->opaque; 1844 1845 /* Emulate misaligned zero writes */ 1846 if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) { 1847 return -ENOTSUP; 1848 } 1849 1850 /* Whatever is left can use real zero clusters */ 1851 qemu_co_mutex_lock(&s->lock); 1852 ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS, 1853 nb_sectors); 1854 qemu_co_mutex_unlock(&s->lock); 1855 1856 return ret; 1857 } 1858 1859 static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, 1860 int64_t sector_num, int nb_sectors) 1861 { 1862 int ret; 1863 BDRVQcowState *s = bs->opaque; 1864 1865 qemu_co_mutex_lock(&s->lock); 1866 ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, 1867 nb_sectors, QCOW2_DISCARD_REQUEST); 1868 qemu_co_mutex_unlock(&s->lock); 1869 return ret; 1870 } 1871 1872 static int qcow2_truncate(BlockDriverState *bs, int64_t offset) 1873 { 1874 BDRVQcowState *s = bs->opaque; 1875 int64_t new_l1_size; 1876 int ret; 1877 1878 if (offset & 511) { 1879 error_report("The new size must be a multiple of 512"); 1880 return -EINVAL; 1881 } 1882 1883 /* cannot proceed if image has snapshots */ 1884 if (s->nb_snapshots) { 1885 error_report("Can't resize an image which has snapshots"); 1886 return -ENOTSUP; 1887 } 1888 1889 /* shrinking is currently not supported */ 1890 if (offset < bs->total_sectors * 512) { 1891 error_report("qcow2 doesn't support shrinking images yet"); 1892 return -ENOTSUP; 1893 } 1894 1895 new_l1_size = size_to_l1(s, offset); 1896 ret = qcow2_grow_l1_table(bs, new_l1_size, true); 1897 if (ret < 0) { 1898 return ret; 1899 } 1900 1901 /* write updated header.size */ 1902 offset = cpu_to_be64(offset); 1903 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), 1904 &offset, sizeof(uint64_t)); 1905 if (ret < 0) { 1906 return ret; 1907 } 1908 1909 s->l1_vm_state_index = new_l1_size; 1910 return 0; 1911 } 1912 1913 /* XXX: put compressed sectors first, then all the cluster aligned 1914 tables to avoid losing bytes in alignment */ 1915 static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, 1916 const uint8_t *buf, int nb_sectors) 1917 { 1918 BDRVQcowState *s = bs->opaque; 1919 z_stream strm; 1920 int ret, out_len; 1921 uint8_t *out_buf; 1922 uint64_t cluster_offset; 1923 1924 if (nb_sectors == 0) { 1925 /* align end of file to a sector boundary to ease reading with 1926 sector based I/Os */ 1927 cluster_offset = bdrv_getlength(bs->file); 1928 cluster_offset = (cluster_offset + 511) & ~511; 1929 bdrv_truncate(bs->file, cluster_offset); 1930 return 0; 1931 } 1932 1933 if (nb_sectors != s->cluster_sectors) { 1934 ret = -EINVAL; 1935 1936 /* Zero-pad last write if image size is not cluster aligned */ 1937 if (sector_num + nb_sectors == bs->total_sectors && 1938 nb_sectors < s->cluster_sectors) { 1939 uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size); 1940 memset(pad_buf, 0, s->cluster_size); 1941 memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE); 1942 ret = qcow2_write_compressed(bs, sector_num, 1943 pad_buf, s->cluster_sectors); 1944 qemu_vfree(pad_buf); 1945 } 1946 return ret; 1947 } 1948 1949 out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); 1950 1951 /* best compression, small window, no zlib header */ 1952 memset(&strm, 0, sizeof(strm)); 1953 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, 1954 Z_DEFLATED, -12, 1955 9, Z_DEFAULT_STRATEGY); 1956 if (ret != 0) { 1957 ret = -EINVAL; 1958 goto fail; 1959 } 1960 1961 strm.avail_in = s->cluster_size; 1962 strm.next_in = (uint8_t *)buf; 1963 strm.avail_out = s->cluster_size; 1964 strm.next_out = out_buf; 1965 1966 ret = deflate(&strm, Z_FINISH); 1967 if (ret != Z_STREAM_END && ret != Z_OK) { 1968 deflateEnd(&strm); 1969 ret = -EINVAL; 1970 goto fail; 1971 } 1972 out_len = strm.next_out - out_buf; 1973 1974 deflateEnd(&strm); 1975 1976 if (ret != Z_STREAM_END || out_len >= s->cluster_size) { 1977 /* could not compress: write normal cluster */ 1978 ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors); 1979 if (ret < 0) { 1980 goto fail; 1981 } 1982 } else { 1983 cluster_offset = qcow2_alloc_compressed_cluster_offset(bs, 1984 sector_num << 9, out_len); 1985 if (!cluster_offset) { 1986 ret = -EIO; 1987 goto fail; 1988 } 1989 cluster_offset &= s->cluster_offset_mask; 1990 1991 ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len); 1992 if (ret < 0) { 1993 goto fail; 1994 } 1995 1996 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); 1997 ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len); 1998 if (ret < 0) { 1999 goto fail; 2000 } 2001 } 2002 2003 ret = 0; 2004 fail: 2005 g_free(out_buf); 2006 return ret; 2007 } 2008 2009 static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) 2010 { 2011 BDRVQcowState *s = bs->opaque; 2012 int ret; 2013 2014 qemu_co_mutex_lock(&s->lock); 2015 ret = qcow2_cache_flush(bs, s->l2_table_cache); 2016 if (ret < 0) { 2017 qemu_co_mutex_unlock(&s->lock); 2018 return ret; 2019 } 2020 2021 if (qcow2_need_accurate_refcounts(s)) { 2022 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 2023 if (ret < 0) { 2024 qemu_co_mutex_unlock(&s->lock); 2025 return ret; 2026 } 2027 } 2028 qemu_co_mutex_unlock(&s->lock); 2029 2030 return 0; 2031 } 2032 2033 static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 2034 { 2035 BDRVQcowState *s = bs->opaque; 2036 bdi->unallocated_blocks_are_zero = true; 2037 bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3); 2038 bdi->cluster_size = s->cluster_size; 2039 bdi->vm_state_offset = qcow2_vm_state_offset(s); 2040 return 0; 2041 } 2042 2043 static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) 2044 { 2045 BDRVQcowState *s = bs->opaque; 2046 ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); 2047 2048 *spec_info = (ImageInfoSpecific){ 2049 .kind = IMAGE_INFO_SPECIFIC_KIND_QCOW2, 2050 { 2051 .qcow2 = g_new(ImageInfoSpecificQCow2, 1), 2052 }, 2053 }; 2054 if (s->qcow_version == 2) { 2055 *spec_info->qcow2 = (ImageInfoSpecificQCow2){ 2056 .compat = g_strdup("0.10"), 2057 }; 2058 } else if (s->qcow_version == 3) { 2059 *spec_info->qcow2 = (ImageInfoSpecificQCow2){ 2060 .compat = g_strdup("1.1"), 2061 .lazy_refcounts = s->compatible_features & 2062 QCOW2_COMPAT_LAZY_REFCOUNTS, 2063 .has_lazy_refcounts = true, 2064 }; 2065 } 2066 2067 return spec_info; 2068 } 2069 2070 #if 0 2071 static void dump_refcounts(BlockDriverState *bs) 2072 { 2073 BDRVQcowState *s = bs->opaque; 2074 int64_t nb_clusters, k, k1, size; 2075 int refcount; 2076 2077 size = bdrv_getlength(bs->file); 2078 nb_clusters = size_to_clusters(s, size); 2079 for(k = 0; k < nb_clusters;) { 2080 k1 = k; 2081 refcount = get_refcount(bs, k); 2082 k++; 2083 while (k < nb_clusters && get_refcount(bs, k) == refcount) 2084 k++; 2085 printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount, 2086 k - k1); 2087 } 2088 } 2089 #endif 2090 2091 static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 2092 int64_t pos) 2093 { 2094 BDRVQcowState *s = bs->opaque; 2095 int64_t total_sectors = bs->total_sectors; 2096 int growable = bs->growable; 2097 bool zero_beyond_eof = bs->zero_beyond_eof; 2098 int ret; 2099 2100 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); 2101 bs->growable = 1; 2102 bs->zero_beyond_eof = false; 2103 ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov); 2104 bs->growable = growable; 2105 bs->zero_beyond_eof = zero_beyond_eof; 2106 2107 /* bdrv_co_do_writev will have increased the total_sectors value to include 2108 * the VM state - the VM state is however not an actual part of the block 2109 * device, therefore, we need to restore the old value. */ 2110 bs->total_sectors = total_sectors; 2111 2112 return ret; 2113 } 2114 2115 static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, 2116 int64_t pos, int size) 2117 { 2118 BDRVQcowState *s = bs->opaque; 2119 int growable = bs->growable; 2120 bool zero_beyond_eof = bs->zero_beyond_eof; 2121 int ret; 2122 2123 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); 2124 bs->growable = 1; 2125 bs->zero_beyond_eof = false; 2126 ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size); 2127 bs->growable = growable; 2128 bs->zero_beyond_eof = zero_beyond_eof; 2129 2130 return ret; 2131 } 2132 2133 /* 2134 * Downgrades an image's version. To achieve this, any incompatible features 2135 * have to be removed. 2136 */ 2137 static int qcow2_downgrade(BlockDriverState *bs, int target_version) 2138 { 2139 BDRVQcowState *s = bs->opaque; 2140 int current_version = s->qcow_version; 2141 int ret; 2142 2143 if (target_version == current_version) { 2144 return 0; 2145 } else if (target_version > current_version) { 2146 return -EINVAL; 2147 } else if (target_version != 2) { 2148 return -EINVAL; 2149 } 2150 2151 if (s->refcount_order != 4) { 2152 /* we would have to convert the image to a refcount_order == 4 image 2153 * here; however, since qemu (at the time of writing this) does not 2154 * support anything different than 4 anyway, there is no point in doing 2155 * so right now; however, we should error out (if qemu supports this in 2156 * the future and this code has not been adapted) */ 2157 error_report("qcow2_downgrade: Image refcount orders other than 4 are " 2158 "currently not supported."); 2159 return -ENOTSUP; 2160 } 2161 2162 /* clear incompatible features */ 2163 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 2164 ret = qcow2_mark_clean(bs); 2165 if (ret < 0) { 2166 return ret; 2167 } 2168 } 2169 2170 /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in 2171 * the first place; if that happens nonetheless, returning -ENOTSUP is the 2172 * best thing to do anyway */ 2173 2174 if (s->incompatible_features) { 2175 return -ENOTSUP; 2176 } 2177 2178 /* since we can ignore compatible features, we can set them to 0 as well */ 2179 s->compatible_features = 0; 2180 /* if lazy refcounts have been used, they have already been fixed through 2181 * clearing the dirty flag */ 2182 2183 /* clearing autoclear features is trivial */ 2184 s->autoclear_features = 0; 2185 2186 ret = qcow2_expand_zero_clusters(bs); 2187 if (ret < 0) { 2188 return ret; 2189 } 2190 2191 s->qcow_version = target_version; 2192 ret = qcow2_update_header(bs); 2193 if (ret < 0) { 2194 s->qcow_version = current_version; 2195 return ret; 2196 } 2197 return 0; 2198 } 2199 2200 static int qcow2_amend_options(BlockDriverState *bs, 2201 QEMUOptionParameter *options) 2202 { 2203 BDRVQcowState *s = bs->opaque; 2204 int old_version = s->qcow_version, new_version = old_version; 2205 uint64_t new_size = 0; 2206 const char *backing_file = NULL, *backing_format = NULL; 2207 bool lazy_refcounts = s->use_lazy_refcounts; 2208 int ret; 2209 int i; 2210 2211 for (i = 0; options[i].name; i++) 2212 { 2213 if (!options[i].assigned) { 2214 /* only change explicitly defined options */ 2215 continue; 2216 } 2217 2218 if (!strcmp(options[i].name, "compat")) { 2219 if (!options[i].value.s) { 2220 /* preserve default */ 2221 } else if (!strcmp(options[i].value.s, "0.10")) { 2222 new_version = 2; 2223 } else if (!strcmp(options[i].value.s, "1.1")) { 2224 new_version = 3; 2225 } else { 2226 fprintf(stderr, "Unknown compatibility level %s.\n", 2227 options[i].value.s); 2228 return -EINVAL; 2229 } 2230 } else if (!strcmp(options[i].name, "preallocation")) { 2231 fprintf(stderr, "Cannot change preallocation mode.\n"); 2232 return -ENOTSUP; 2233 } else if (!strcmp(options[i].name, "size")) { 2234 new_size = options[i].value.n; 2235 } else if (!strcmp(options[i].name, "backing_file")) { 2236 backing_file = options[i].value.s; 2237 } else if (!strcmp(options[i].name, "backing_fmt")) { 2238 backing_format = options[i].value.s; 2239 } else if (!strcmp(options[i].name, "encryption")) { 2240 if ((options[i].value.n != !!s->crypt_method)) { 2241 fprintf(stderr, "Changing the encryption flag is not " 2242 "supported.\n"); 2243 return -ENOTSUP; 2244 } 2245 } else if (!strcmp(options[i].name, "cluster_size")) { 2246 if (options[i].value.n != s->cluster_size) { 2247 fprintf(stderr, "Changing the cluster size is not " 2248 "supported.\n"); 2249 return -ENOTSUP; 2250 } 2251 } else if (!strcmp(options[i].name, "lazy_refcounts")) { 2252 lazy_refcounts = options[i].value.n; 2253 } else { 2254 /* if this assertion fails, this probably means a new option was 2255 * added without having it covered here */ 2256 assert(false); 2257 } 2258 } 2259 2260 if (new_version != old_version) { 2261 if (new_version > old_version) { 2262 /* Upgrade */ 2263 s->qcow_version = new_version; 2264 ret = qcow2_update_header(bs); 2265 if (ret < 0) { 2266 s->qcow_version = old_version; 2267 return ret; 2268 } 2269 } else { 2270 ret = qcow2_downgrade(bs, new_version); 2271 if (ret < 0) { 2272 return ret; 2273 } 2274 } 2275 } 2276 2277 if (backing_file || backing_format) { 2278 ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file, 2279 backing_format ?: bs->backing_format); 2280 if (ret < 0) { 2281 return ret; 2282 } 2283 } 2284 2285 if (s->use_lazy_refcounts != lazy_refcounts) { 2286 if (lazy_refcounts) { 2287 if (s->qcow_version < 3) { 2288 fprintf(stderr, "Lazy refcounts only supported with compatibility " 2289 "level 1.1 and above (use compat=1.1 or greater)\n"); 2290 return -EINVAL; 2291 } 2292 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 2293 ret = qcow2_update_header(bs); 2294 if (ret < 0) { 2295 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 2296 return ret; 2297 } 2298 s->use_lazy_refcounts = true; 2299 } else { 2300 /* make image clean first */ 2301 ret = qcow2_mark_clean(bs); 2302 if (ret < 0) { 2303 return ret; 2304 } 2305 /* now disallow lazy refcounts */ 2306 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS; 2307 ret = qcow2_update_header(bs); 2308 if (ret < 0) { 2309 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; 2310 return ret; 2311 } 2312 s->use_lazy_refcounts = false; 2313 } 2314 } 2315 2316 if (new_size) { 2317 ret = bdrv_truncate(bs, new_size); 2318 if (ret < 0) { 2319 return ret; 2320 } 2321 } 2322 2323 return 0; 2324 } 2325 2326 static QEMUOptionParameter qcow2_create_options[] = { 2327 { 2328 .name = BLOCK_OPT_SIZE, 2329 .type = OPT_SIZE, 2330 .help = "Virtual disk size" 2331 }, 2332 { 2333 .name = BLOCK_OPT_COMPAT_LEVEL, 2334 .type = OPT_STRING, 2335 .help = "Compatibility level (0.10 or 1.1)" 2336 }, 2337 { 2338 .name = BLOCK_OPT_BACKING_FILE, 2339 .type = OPT_STRING, 2340 .help = "File name of a base image" 2341 }, 2342 { 2343 .name = BLOCK_OPT_BACKING_FMT, 2344 .type = OPT_STRING, 2345 .help = "Image format of the base image" 2346 }, 2347 { 2348 .name = BLOCK_OPT_ENCRYPT, 2349 .type = OPT_FLAG, 2350 .help = "Encrypt the image" 2351 }, 2352 { 2353 .name = BLOCK_OPT_CLUSTER_SIZE, 2354 .type = OPT_SIZE, 2355 .help = "qcow2 cluster size", 2356 .value = { .n = DEFAULT_CLUSTER_SIZE }, 2357 }, 2358 { 2359 .name = BLOCK_OPT_PREALLOC, 2360 .type = OPT_STRING, 2361 .help = "Preallocation mode (allowed values: off, metadata)" 2362 }, 2363 { 2364 .name = BLOCK_OPT_LAZY_REFCOUNTS, 2365 .type = OPT_FLAG, 2366 .help = "Postpone refcount updates", 2367 }, 2368 { NULL } 2369 }; 2370 2371 static BlockDriver bdrv_qcow2 = { 2372 .format_name = "qcow2", 2373 .instance_size = sizeof(BDRVQcowState), 2374 .bdrv_probe = qcow2_probe, 2375 .bdrv_open = qcow2_open, 2376 .bdrv_close = qcow2_close, 2377 .bdrv_reopen_prepare = qcow2_reopen_prepare, 2378 .bdrv_create = qcow2_create, 2379 .bdrv_has_zero_init = bdrv_has_zero_init_1, 2380 .bdrv_co_get_block_status = qcow2_co_get_block_status, 2381 .bdrv_set_key = qcow2_set_key, 2382 2383 .bdrv_co_readv = qcow2_co_readv, 2384 .bdrv_co_writev = qcow2_co_writev, 2385 .bdrv_co_flush_to_os = qcow2_co_flush_to_os, 2386 2387 .bdrv_co_write_zeroes = qcow2_co_write_zeroes, 2388 .bdrv_co_discard = qcow2_co_discard, 2389 .bdrv_truncate = qcow2_truncate, 2390 .bdrv_write_compressed = qcow2_write_compressed, 2391 2392 .bdrv_snapshot_create = qcow2_snapshot_create, 2393 .bdrv_snapshot_goto = qcow2_snapshot_goto, 2394 .bdrv_snapshot_delete = qcow2_snapshot_delete, 2395 .bdrv_snapshot_list = qcow2_snapshot_list, 2396 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, 2397 .bdrv_get_info = qcow2_get_info, 2398 .bdrv_get_specific_info = qcow2_get_specific_info, 2399 2400 .bdrv_save_vmstate = qcow2_save_vmstate, 2401 .bdrv_load_vmstate = qcow2_load_vmstate, 2402 2403 .bdrv_change_backing_file = qcow2_change_backing_file, 2404 2405 .bdrv_refresh_limits = qcow2_refresh_limits, 2406 .bdrv_invalidate_cache = qcow2_invalidate_cache, 2407 2408 .create_options = qcow2_create_options, 2409 .bdrv_check = qcow2_check, 2410 .bdrv_amend_options = qcow2_amend_options, 2411 }; 2412 2413 static void bdrv_qcow2_init(void) 2414 { 2415 bdrv_register(&bdrv_qcow2); 2416 } 2417 2418 block_init(bdrv_qcow2_init); 2419