1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "qemu-common.h" 25 #include "block/block_int.h" 26 #include "qemu/module.h" 27 #include <zlib.h> 28 #include "qemu/aes.h" 29 #include "block/qcow2.h" 30 #include "qemu/error-report.h" 31 #include "qapi/qmp/qerror.h" 32 #include "qapi/qmp/qbool.h" 33 #include "trace.h" 34 35 /* 36 Differences with QCOW: 37 38 - Support for multiple incremental snapshots. 39 - Memory management by reference counts. 40 - Clusters which have a reference count of one have the bit 41 QCOW_OFLAG_COPIED to optimize write performance. 42 - Size of compressed clusters is stored in sectors to reduce bit usage 43 in the cluster offsets. 44 - Support for storing additional data (such as the VM state) in the 45 snapshots. 46 - If a backing store is used, the cluster size is not constrained 47 (could be backported to QCOW). 48 - L2 tables have always a size of one cluster. 49 */ 50 51 52 typedef struct { 53 uint32_t magic; 54 uint32_t len; 55 } QCowExtension; 56 57 #define QCOW2_EXT_MAGIC_END 0 58 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA 59 #define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 60 61 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) 62 { 63 const QCowHeader *cow_header = (const void *)buf; 64 65 if (buf_size >= sizeof(QCowHeader) && 66 be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 67 be32_to_cpu(cow_header->version) >= 2) 68 return 100; 69 else 70 return 0; 71 } 72 73 74 /* 75 * read qcow2 extension and fill bs 76 * start reading from start_offset 77 * finish reading upon magic of value 0 or when end_offset reached 78 * unknown magic is skipped (future extension this version knows nothing about) 79 * return 0 upon success, non-0 otherwise 80 */ 81 static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, 82 uint64_t end_offset, void **p_feature_table) 83 { 84 BDRVQcowState *s = bs->opaque; 85 QCowExtension ext; 86 uint64_t offset; 87 int ret; 88 89 #ifdef DEBUG_EXT 90 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); 91 #endif 92 offset = start_offset; 93 while (offset < end_offset) { 94 95 #ifdef DEBUG_EXT 96 /* Sanity check */ 97 if (offset > s->cluster_size) 98 printf("qcow2_read_extension: suspicious offset %lu\n", offset); 99 100 printf("attempting to read extended header in offset %lu\n", offset); 101 #endif 102 103 if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) { 104 fprintf(stderr, "qcow2_read_extension: ERROR: " 105 "pread fail from offset %" PRIu64 "\n", 106 offset); 107 return 1; 108 } 109 be32_to_cpus(&ext.magic); 110 be32_to_cpus(&ext.len); 111 offset += sizeof(ext); 112 #ifdef DEBUG_EXT 113 printf("ext.magic = 0x%x\n", ext.magic); 114 #endif 115 if (ext.len > end_offset - offset) { 116 error_report("Header extension too large"); 117 return -EINVAL; 118 } 119 120 switch (ext.magic) { 121 case QCOW2_EXT_MAGIC_END: 122 return 0; 123 124 case QCOW2_EXT_MAGIC_BACKING_FORMAT: 125 if (ext.len >= sizeof(bs->backing_format)) { 126 fprintf(stderr, "ERROR: ext_backing_format: len=%u too large" 127 " (>=%zu)\n", 128 ext.len, sizeof(bs->backing_format)); 129 return 2; 130 } 131 if (bdrv_pread(bs->file, offset , bs->backing_format, 132 ext.len) != ext.len) 133 return 3; 134 bs->backing_format[ext.len] = '\0'; 135 #ifdef DEBUG_EXT 136 printf("Qcow2: Got format extension %s\n", bs->backing_format); 137 #endif 138 break; 139 140 case QCOW2_EXT_MAGIC_FEATURE_TABLE: 141 if (p_feature_table != NULL) { 142 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); 143 ret = bdrv_pread(bs->file, offset , feature_table, ext.len); 144 if (ret < 0) { 145 return ret; 146 } 147 148 *p_feature_table = feature_table; 149 } 150 break; 151 152 default: 153 /* unknown magic - save it in case we need to rewrite the header */ 154 { 155 Qcow2UnknownHeaderExtension *uext; 156 157 uext = g_malloc0(sizeof(*uext) + ext.len); 158 uext->magic = ext.magic; 159 uext->len = ext.len; 160 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); 161 162 ret = bdrv_pread(bs->file, offset , uext->data, uext->len); 163 if (ret < 0) { 164 return ret; 165 } 166 } 167 break; 168 } 169 170 offset += ((ext.len + 7) & ~7); 171 } 172 173 return 0; 174 } 175 176 static void cleanup_unknown_header_ext(BlockDriverState *bs) 177 { 178 BDRVQcowState *s = bs->opaque; 179 Qcow2UnknownHeaderExtension *uext, *next; 180 181 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { 182 QLIST_REMOVE(uext, next); 183 g_free(uext); 184 } 185 } 186 187 static void GCC_FMT_ATTR(2, 3) report_unsupported(BlockDriverState *bs, 188 const char *fmt, ...) 189 { 190 char msg[64]; 191 va_list ap; 192 193 va_start(ap, fmt); 194 vsnprintf(msg, sizeof(msg), fmt, ap); 195 va_end(ap); 196 197 qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, 198 bs->device_name, "qcow2", msg); 199 } 200 201 static void report_unsupported_feature(BlockDriverState *bs, 202 Qcow2Feature *table, uint64_t mask) 203 { 204 while (table && table->name[0] != '\0') { 205 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { 206 if (mask & (1 << table->bit)) { 207 report_unsupported(bs, "%.46s",table->name); 208 mask &= ~(1 << table->bit); 209 } 210 } 211 table++; 212 } 213 214 if (mask) { 215 report_unsupported(bs, "Unknown incompatible feature: %" PRIx64, mask); 216 } 217 } 218 219 /* 220 * Sets the dirty bit and flushes afterwards if necessary. 221 * 222 * The incompatible_features bit is only set if the image file header was 223 * updated successfully. Therefore it is not required to check the return 224 * value of this function. 225 */ 226 int qcow2_mark_dirty(BlockDriverState *bs) 227 { 228 BDRVQcowState *s = bs->opaque; 229 uint64_t val; 230 int ret; 231 232 assert(s->qcow_version >= 3); 233 234 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 235 return 0; /* already dirty */ 236 } 237 238 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); 239 ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), 240 &val, sizeof(val)); 241 if (ret < 0) { 242 return ret; 243 } 244 ret = bdrv_flush(bs->file); 245 if (ret < 0) { 246 return ret; 247 } 248 249 /* Only treat image as dirty if the header was updated successfully */ 250 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY; 251 return 0; 252 } 253 254 /* 255 * Clears the dirty bit and flushes before if necessary. Only call this 256 * function when there are no pending requests, it does not guard against 257 * concurrent requests dirtying the image. 258 */ 259 static int qcow2_mark_clean(BlockDriverState *bs) 260 { 261 BDRVQcowState *s = bs->opaque; 262 263 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { 264 int ret = bdrv_flush(bs); 265 if (ret < 0) { 266 return ret; 267 } 268 269 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY; 270 return qcow2_update_header(bs); 271 } 272 return 0; 273 } 274 275 /* 276 * Marks the image as corrupt. 277 */ 278 int qcow2_mark_corrupt(BlockDriverState *bs) 279 { 280 BDRVQcowState *s = bs->opaque; 281 282 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; 283 return qcow2_update_header(bs); 284 } 285 286 /* 287 * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes 288 * before if necessary. 289 */ 290 int qcow2_mark_consistent(BlockDriverState *bs) 291 { 292 BDRVQcowState *s = bs->opaque; 293 294 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 295 int ret = bdrv_flush(bs); 296 if (ret < 0) { 297 return ret; 298 } 299 300 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT; 301 return qcow2_update_header(bs); 302 } 303 return 0; 304 } 305 306 static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result, 307 BdrvCheckMode fix) 308 { 309 int ret = qcow2_check_refcounts(bs, result, fix); 310 if (ret < 0) { 311 return ret; 312 } 313 314 if (fix && result->check_errors == 0 && result->corruptions == 0) { 315 return qcow2_mark_clean(bs); 316 } 317 return ret; 318 } 319 320 static QemuOptsList qcow2_runtime_opts = { 321 .name = "qcow2", 322 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head), 323 .desc = { 324 { 325 .name = QCOW2_OPT_LAZY_REFCOUNTS, 326 .type = QEMU_OPT_BOOL, 327 .help = "Postpone refcount updates", 328 }, 329 { 330 .name = QCOW2_OPT_DISCARD_REQUEST, 331 .type = QEMU_OPT_BOOL, 332 .help = "Pass guest discard requests to the layer below", 333 }, 334 { 335 .name = QCOW2_OPT_DISCARD_SNAPSHOT, 336 .type = QEMU_OPT_BOOL, 337 .help = "Generate discard requests when snapshot related space " 338 "is freed", 339 }, 340 { 341 .name = QCOW2_OPT_DISCARD_OTHER, 342 .type = QEMU_OPT_BOOL, 343 .help = "Generate discard requests when other clusters are freed", 344 }, 345 { /* end of list */ } 346 }, 347 }; 348 349 static int qcow2_open(BlockDriverState *bs, QDict *options, int flags) 350 { 351 BDRVQcowState *s = bs->opaque; 352 int len, i, ret = 0; 353 QCowHeader header; 354 QemuOpts *opts; 355 Error *local_err = NULL; 356 uint64_t ext_end; 357 uint64_t l1_vm_state_index; 358 359 ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); 360 if (ret < 0) { 361 goto fail; 362 } 363 be32_to_cpus(&header.magic); 364 be32_to_cpus(&header.version); 365 be64_to_cpus(&header.backing_file_offset); 366 be32_to_cpus(&header.backing_file_size); 367 be64_to_cpus(&header.size); 368 be32_to_cpus(&header.cluster_bits); 369 be32_to_cpus(&header.crypt_method); 370 be64_to_cpus(&header.l1_table_offset); 371 be32_to_cpus(&header.l1_size); 372 be64_to_cpus(&header.refcount_table_offset); 373 be32_to_cpus(&header.refcount_table_clusters); 374 be64_to_cpus(&header.snapshots_offset); 375 be32_to_cpus(&header.nb_snapshots); 376 377 if (header.magic != QCOW_MAGIC) { 378 ret = -EMEDIUMTYPE; 379 goto fail; 380 } 381 if (header.version < 2 || header.version > 3) { 382 report_unsupported(bs, "QCOW version %d", header.version); 383 ret = -ENOTSUP; 384 goto fail; 385 } 386 387 s->qcow_version = header.version; 388 389 /* Initialise version 3 header fields */ 390 if (header.version == 2) { 391 header.incompatible_features = 0; 392 header.compatible_features = 0; 393 header.autoclear_features = 0; 394 header.refcount_order = 4; 395 header.header_length = 72; 396 } else { 397 be64_to_cpus(&header.incompatible_features); 398 be64_to_cpus(&header.compatible_features); 399 be64_to_cpus(&header.autoclear_features); 400 be32_to_cpus(&header.refcount_order); 401 be32_to_cpus(&header.header_length); 402 } 403 404 if (header.header_length > sizeof(header)) { 405 s->unknown_header_fields_size = header.header_length - sizeof(header); 406 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); 407 ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, 408 s->unknown_header_fields_size); 409 if (ret < 0) { 410 goto fail; 411 } 412 } 413 414 if (header.backing_file_offset) { 415 ext_end = header.backing_file_offset; 416 } else { 417 ext_end = 1 << header.cluster_bits; 418 } 419 420 /* Handle feature bits */ 421 s->incompatible_features = header.incompatible_features; 422 s->compatible_features = header.compatible_features; 423 s->autoclear_features = header.autoclear_features; 424 425 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) { 426 void *feature_table = NULL; 427 qcow2_read_extensions(bs, header.header_length, ext_end, 428 &feature_table); 429 report_unsupported_feature(bs, feature_table, 430 s->incompatible_features & 431 ~QCOW2_INCOMPAT_MASK); 432 ret = -ENOTSUP; 433 goto fail; 434 } 435 436 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { 437 /* Corrupt images may not be written to unless they are being repaired 438 */ 439 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { 440 error_report("qcow2: Image is corrupt; cannot be opened " 441 "read/write."); 442 ret = -EACCES; 443 goto fail; 444 } 445 } 446 447 /* Check support for various header values */ 448 if (header.refcount_order != 4) { 449 report_unsupported(bs, "%d bit reference counts", 450 1 << header.refcount_order); 451 ret = -ENOTSUP; 452 goto fail; 453 } 454 455 if (header.cluster_bits < MIN_CLUSTER_BITS || 456 header.cluster_bits > MAX_CLUSTER_BITS) { 457 ret = -EINVAL; 458 goto fail; 459 } 460 if (header.crypt_method > QCOW_CRYPT_AES) { 461 ret = -EINVAL; 462 goto fail; 463 } 464 s->crypt_method_header = header.crypt_method; 465 if (s->crypt_method_header) { 466 bs->encrypted = 1; 467 } 468 s->cluster_bits = header.cluster_bits; 469 s->cluster_size = 1 << s->cluster_bits; 470 s->cluster_sectors = 1 << (s->cluster_bits - 9); 471 s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ 472 s->l2_size = 1 << s->l2_bits; 473 bs->total_sectors = header.size / 512; 474 s->csize_shift = (62 - (s->cluster_bits - 8)); 475 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; 476 s->cluster_offset_mask = (1LL << s->csize_shift) - 1; 477 s->refcount_table_offset = header.refcount_table_offset; 478 s->refcount_table_size = 479 header.refcount_table_clusters << (s->cluster_bits - 3); 480 481 s->snapshots_offset = header.snapshots_offset; 482 s->nb_snapshots = header.nb_snapshots; 483 484 /* read the level 1 table */ 485 s->l1_size = header.l1_size; 486 487 l1_vm_state_index = size_to_l1(s, header.size); 488 if (l1_vm_state_index > INT_MAX) { 489 ret = -EFBIG; 490 goto fail; 491 } 492 s->l1_vm_state_index = l1_vm_state_index; 493 494 /* the L1 table must contain at least enough entries to put 495 header.size bytes */ 496 if (s->l1_size < s->l1_vm_state_index) { 497 ret = -EINVAL; 498 goto fail; 499 } 500 s->l1_table_offset = header.l1_table_offset; 501 if (s->l1_size > 0) { 502 s->l1_table = g_malloc0( 503 align_offset(s->l1_size * sizeof(uint64_t), 512)); 504 ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, 505 s->l1_size * sizeof(uint64_t)); 506 if (ret < 0) { 507 goto fail; 508 } 509 for(i = 0;i < s->l1_size; i++) { 510 be64_to_cpus(&s->l1_table[i]); 511 } 512 } 513 514 /* alloc L2 table/refcount block cache */ 515 s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE); 516 s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE); 517 518 s->cluster_cache = g_malloc(s->cluster_size); 519 /* one more sector for decompressed data alignment */ 520 s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size 521 + 512); 522 s->cluster_cache_offset = -1; 523 s->flags = flags; 524 525 ret = qcow2_refcount_init(bs); 526 if (ret != 0) { 527 goto fail; 528 } 529 530 QLIST_INIT(&s->cluster_allocs); 531 QTAILQ_INIT(&s->discards); 532 533 /* read qcow2 extensions */ 534 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) { 535 ret = -EINVAL; 536 goto fail; 537 } 538 539 /* read the backing file name */ 540 if (header.backing_file_offset != 0) { 541 len = header.backing_file_size; 542 if (len > 1023) { 543 len = 1023; 544 } 545 ret = bdrv_pread(bs->file, header.backing_file_offset, 546 bs->backing_file, len); 547 if (ret < 0) { 548 goto fail; 549 } 550 bs->backing_file[len] = '\0'; 551 } 552 553 ret = qcow2_read_snapshots(bs); 554 if (ret < 0) { 555 goto fail; 556 } 557 558 /* Clear unknown autoclear feature bits */ 559 if (!bs->read_only && s->autoclear_features != 0) { 560 s->autoclear_features = 0; 561 ret = qcow2_update_header(bs); 562 if (ret < 0) { 563 goto fail; 564 } 565 } 566 567 /* Initialise locks */ 568 qemu_co_mutex_init(&s->lock); 569 570 /* Repair image if dirty */ 571 if (!(flags & BDRV_O_CHECK) && !bs->read_only && 572 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { 573 BdrvCheckResult result = {0}; 574 575 ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS); 576 if (ret < 0) { 577 goto fail; 578 } 579 } 580 581 /* Enable lazy_refcounts according to image and command line options */ 582 opts = qemu_opts_create_nofail(&qcow2_runtime_opts); 583 qemu_opts_absorb_qdict(opts, options, &local_err); 584 if (error_is_set(&local_err)) { 585 qerror_report_err(local_err); 586 error_free(local_err); 587 ret = -EINVAL; 588 goto fail; 589 } 590 591 s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, 592 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); 593 594 s->discard_passthrough[QCOW2_DISCARD_NEVER] = false; 595 s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; 596 s->discard_passthrough[QCOW2_DISCARD_REQUEST] = 597 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, 598 flags & BDRV_O_UNMAP); 599 s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = 600 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); 601 s->discard_passthrough[QCOW2_DISCARD_OTHER] = 602 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); 603 604 qemu_opts_del(opts); 605 606 if (s->use_lazy_refcounts && s->qcow_version < 3) { 607 qerror_report(ERROR_CLASS_GENERIC_ERROR, "Lazy refcounts require " 608 "a qcow2 image with at least qemu 1.1 compatibility level"); 609 ret = -EINVAL; 610 goto fail; 611 } 612 613 #ifdef DEBUG_ALLOC 614 { 615 BdrvCheckResult result = {0}; 616 qcow2_check_refcounts(bs, &result, 0); 617 } 618 #endif 619 return ret; 620 621 fail: 622 g_free(s->unknown_header_fields); 623 cleanup_unknown_header_ext(bs); 624 qcow2_free_snapshots(bs); 625 qcow2_refcount_close(bs); 626 g_free(s->l1_table); 627 if (s->l2_table_cache) { 628 qcow2_cache_destroy(bs, s->l2_table_cache); 629 } 630 g_free(s->cluster_cache); 631 qemu_vfree(s->cluster_data); 632 return ret; 633 } 634 635 static int qcow2_set_key(BlockDriverState *bs, const char *key) 636 { 637 BDRVQcowState *s = bs->opaque; 638 uint8_t keybuf[16]; 639 int len, i; 640 641 memset(keybuf, 0, 16); 642 len = strlen(key); 643 if (len > 16) 644 len = 16; 645 /* XXX: we could compress the chars to 7 bits to increase 646 entropy */ 647 for(i = 0;i < len;i++) { 648 keybuf[i] = key[i]; 649 } 650 s->crypt_method = s->crypt_method_header; 651 652 if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) 653 return -1; 654 if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) 655 return -1; 656 #if 0 657 /* test */ 658 { 659 uint8_t in[16]; 660 uint8_t out[16]; 661 uint8_t tmp[16]; 662 for(i=0;i<16;i++) 663 in[i] = i; 664 AES_encrypt(in, tmp, &s->aes_encrypt_key); 665 AES_decrypt(tmp, out, &s->aes_decrypt_key); 666 for(i = 0; i < 16; i++) 667 printf(" %02x", tmp[i]); 668 printf("\n"); 669 for(i = 0; i < 16; i++) 670 printf(" %02x", out[i]); 671 printf("\n"); 672 } 673 #endif 674 return 0; 675 } 676 677 /* We have nothing to do for QCOW2 reopen, stubs just return 678 * success */ 679 static int qcow2_reopen_prepare(BDRVReopenState *state, 680 BlockReopenQueue *queue, Error **errp) 681 { 682 return 0; 683 } 684 685 static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs, 686 int64_t sector_num, int nb_sectors, int *pnum) 687 { 688 BDRVQcowState *s = bs->opaque; 689 uint64_t cluster_offset; 690 int ret; 691 692 *pnum = nb_sectors; 693 /* FIXME We can get errors here, but the bdrv_co_is_allocated interface 694 * can't pass them on today */ 695 qemu_co_mutex_lock(&s->lock); 696 ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset); 697 qemu_co_mutex_unlock(&s->lock); 698 if (ret < 0) { 699 *pnum = 0; 700 } 701 702 return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO); 703 } 704 705 /* handle reading after the end of the backing file */ 706 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, 707 int64_t sector_num, int nb_sectors) 708 { 709 int n1; 710 if ((sector_num + nb_sectors) <= bs->total_sectors) 711 return nb_sectors; 712 if (sector_num >= bs->total_sectors) 713 n1 = 0; 714 else 715 n1 = bs->total_sectors - sector_num; 716 717 qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1)); 718 719 return n1; 720 } 721 722 static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, 723 int remaining_sectors, QEMUIOVector *qiov) 724 { 725 BDRVQcowState *s = bs->opaque; 726 int index_in_cluster, n1; 727 int ret; 728 int cur_nr_sectors; /* number of sectors in current iteration */ 729 uint64_t cluster_offset = 0; 730 uint64_t bytes_done = 0; 731 QEMUIOVector hd_qiov; 732 uint8_t *cluster_data = NULL; 733 734 qemu_iovec_init(&hd_qiov, qiov->niov); 735 736 qemu_co_mutex_lock(&s->lock); 737 738 while (remaining_sectors != 0) { 739 740 /* prepare next request */ 741 cur_nr_sectors = remaining_sectors; 742 if (s->crypt_method) { 743 cur_nr_sectors = MIN(cur_nr_sectors, 744 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); 745 } 746 747 ret = qcow2_get_cluster_offset(bs, sector_num << 9, 748 &cur_nr_sectors, &cluster_offset); 749 if (ret < 0) { 750 goto fail; 751 } 752 753 index_in_cluster = sector_num & (s->cluster_sectors - 1); 754 755 qemu_iovec_reset(&hd_qiov); 756 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, 757 cur_nr_sectors * 512); 758 759 switch (ret) { 760 case QCOW2_CLUSTER_UNALLOCATED: 761 762 if (bs->backing_hd) { 763 /* read from the base image */ 764 n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov, 765 sector_num, cur_nr_sectors); 766 if (n1 > 0) { 767 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 768 qemu_co_mutex_unlock(&s->lock); 769 ret = bdrv_co_readv(bs->backing_hd, sector_num, 770 n1, &hd_qiov); 771 qemu_co_mutex_lock(&s->lock); 772 if (ret < 0) { 773 goto fail; 774 } 775 } 776 } else { 777 /* Note: in this case, no need to wait */ 778 qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); 779 } 780 break; 781 782 case QCOW2_CLUSTER_ZERO: 783 qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); 784 break; 785 786 case QCOW2_CLUSTER_COMPRESSED: 787 /* add AIO support for compressed blocks ? */ 788 ret = qcow2_decompress_cluster(bs, cluster_offset); 789 if (ret < 0) { 790 goto fail; 791 } 792 793 qemu_iovec_from_buf(&hd_qiov, 0, 794 s->cluster_cache + index_in_cluster * 512, 795 512 * cur_nr_sectors); 796 break; 797 798 case QCOW2_CLUSTER_NORMAL: 799 if ((cluster_offset & 511) != 0) { 800 ret = -EIO; 801 goto fail; 802 } 803 804 if (s->crypt_method) { 805 /* 806 * For encrypted images, read everything into a temporary 807 * contiguous buffer on which the AES functions can work. 808 */ 809 if (!cluster_data) { 810 cluster_data = 811 qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 812 } 813 814 assert(cur_nr_sectors <= 815 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); 816 qemu_iovec_reset(&hd_qiov); 817 qemu_iovec_add(&hd_qiov, cluster_data, 818 512 * cur_nr_sectors); 819 } 820 821 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); 822 qemu_co_mutex_unlock(&s->lock); 823 ret = bdrv_co_readv(bs->file, 824 (cluster_offset >> 9) + index_in_cluster, 825 cur_nr_sectors, &hd_qiov); 826 qemu_co_mutex_lock(&s->lock); 827 if (ret < 0) { 828 goto fail; 829 } 830 if (s->crypt_method) { 831 qcow2_encrypt_sectors(s, sector_num, cluster_data, 832 cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key); 833 qemu_iovec_from_buf(qiov, bytes_done, 834 cluster_data, 512 * cur_nr_sectors); 835 } 836 break; 837 838 default: 839 g_assert_not_reached(); 840 ret = -EIO; 841 goto fail; 842 } 843 844 remaining_sectors -= cur_nr_sectors; 845 sector_num += cur_nr_sectors; 846 bytes_done += cur_nr_sectors * 512; 847 } 848 ret = 0; 849 850 fail: 851 qemu_co_mutex_unlock(&s->lock); 852 853 qemu_iovec_destroy(&hd_qiov); 854 qemu_vfree(cluster_data); 855 856 return ret; 857 } 858 859 static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, 860 int64_t sector_num, 861 int remaining_sectors, 862 QEMUIOVector *qiov) 863 { 864 BDRVQcowState *s = bs->opaque; 865 int index_in_cluster; 866 int n_end; 867 int ret; 868 int cur_nr_sectors; /* number of sectors in current iteration */ 869 uint64_t cluster_offset; 870 QEMUIOVector hd_qiov; 871 uint64_t bytes_done = 0; 872 uint8_t *cluster_data = NULL; 873 QCowL2Meta *l2meta = NULL; 874 875 trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, 876 remaining_sectors); 877 878 qemu_iovec_init(&hd_qiov, qiov->niov); 879 880 s->cluster_cache_offset = -1; /* disable compressed cache */ 881 882 qemu_co_mutex_lock(&s->lock); 883 884 while (remaining_sectors != 0) { 885 886 l2meta = NULL; 887 888 trace_qcow2_writev_start_part(qemu_coroutine_self()); 889 index_in_cluster = sector_num & (s->cluster_sectors - 1); 890 n_end = index_in_cluster + remaining_sectors; 891 if (s->crypt_method && 892 n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) { 893 n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors; 894 } 895 896 ret = qcow2_alloc_cluster_offset(bs, sector_num << 9, 897 index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta); 898 if (ret < 0) { 899 goto fail; 900 } 901 902 assert((cluster_offset & 511) == 0); 903 904 qemu_iovec_reset(&hd_qiov); 905 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, 906 cur_nr_sectors * 512); 907 908 if (s->crypt_method) { 909 if (!cluster_data) { 910 cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * 911 s->cluster_size); 912 } 913 914 assert(hd_qiov.size <= 915 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); 916 qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); 917 918 qcow2_encrypt_sectors(s, sector_num, cluster_data, 919 cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key); 920 921 qemu_iovec_reset(&hd_qiov); 922 qemu_iovec_add(&hd_qiov, cluster_data, 923 cur_nr_sectors * 512); 924 } 925 926 qemu_co_mutex_unlock(&s->lock); 927 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 928 trace_qcow2_writev_data(qemu_coroutine_self(), 929 (cluster_offset >> 9) + index_in_cluster); 930 ret = bdrv_co_writev(bs->file, 931 (cluster_offset >> 9) + index_in_cluster, 932 cur_nr_sectors, &hd_qiov); 933 qemu_co_mutex_lock(&s->lock); 934 if (ret < 0) { 935 goto fail; 936 } 937 938 while (l2meta != NULL) { 939 QCowL2Meta *next; 940 941 ret = qcow2_alloc_cluster_link_l2(bs, l2meta); 942 if (ret < 0) { 943 goto fail; 944 } 945 946 /* Take the request off the list of running requests */ 947 if (l2meta->nb_clusters != 0) { 948 QLIST_REMOVE(l2meta, next_in_flight); 949 } 950 951 qemu_co_queue_restart_all(&l2meta->dependent_requests); 952 953 next = l2meta->next; 954 g_free(l2meta); 955 l2meta = next; 956 } 957 958 remaining_sectors -= cur_nr_sectors; 959 sector_num += cur_nr_sectors; 960 bytes_done += cur_nr_sectors * 512; 961 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors); 962 } 963 ret = 0; 964 965 fail: 966 qemu_co_mutex_unlock(&s->lock); 967 968 while (l2meta != NULL) { 969 QCowL2Meta *next; 970 971 if (l2meta->nb_clusters != 0) { 972 QLIST_REMOVE(l2meta, next_in_flight); 973 } 974 qemu_co_queue_restart_all(&l2meta->dependent_requests); 975 976 next = l2meta->next; 977 g_free(l2meta); 978 l2meta = next; 979 } 980 981 qemu_iovec_destroy(&hd_qiov); 982 qemu_vfree(cluster_data); 983 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); 984 985 return ret; 986 } 987 988 static void qcow2_close(BlockDriverState *bs) 989 { 990 BDRVQcowState *s = bs->opaque; 991 g_free(s->l1_table); 992 993 qcow2_cache_flush(bs, s->l2_table_cache); 994 qcow2_cache_flush(bs, s->refcount_block_cache); 995 996 qcow2_mark_clean(bs); 997 998 qcow2_cache_destroy(bs, s->l2_table_cache); 999 qcow2_cache_destroy(bs, s->refcount_block_cache); 1000 1001 g_free(s->unknown_header_fields); 1002 cleanup_unknown_header_ext(bs); 1003 1004 g_free(s->cluster_cache); 1005 qemu_vfree(s->cluster_data); 1006 qcow2_refcount_close(bs); 1007 qcow2_free_snapshots(bs); 1008 } 1009 1010 static void qcow2_invalidate_cache(BlockDriverState *bs) 1011 { 1012 BDRVQcowState *s = bs->opaque; 1013 int flags = s->flags; 1014 AES_KEY aes_encrypt_key; 1015 AES_KEY aes_decrypt_key; 1016 uint32_t crypt_method = 0; 1017 QDict *options; 1018 1019 /* 1020 * Backing files are read-only which makes all of their metadata immutable, 1021 * that means we don't have to worry about reopening them here. 1022 */ 1023 1024 if (s->crypt_method) { 1025 crypt_method = s->crypt_method; 1026 memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key)); 1027 memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key)); 1028 } 1029 1030 qcow2_close(bs); 1031 1032 options = qdict_new(); 1033 qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS, 1034 qbool_from_int(s->use_lazy_refcounts)); 1035 1036 memset(s, 0, sizeof(BDRVQcowState)); 1037 qcow2_open(bs, options, flags); 1038 1039 QDECREF(options); 1040 1041 if (crypt_method) { 1042 s->crypt_method = crypt_method; 1043 memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key)); 1044 memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key)); 1045 } 1046 } 1047 1048 static size_t header_ext_add(char *buf, uint32_t magic, const void *s, 1049 size_t len, size_t buflen) 1050 { 1051 QCowExtension *ext_backing_fmt = (QCowExtension*) buf; 1052 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7); 1053 1054 if (buflen < ext_len) { 1055 return -ENOSPC; 1056 } 1057 1058 *ext_backing_fmt = (QCowExtension) { 1059 .magic = cpu_to_be32(magic), 1060 .len = cpu_to_be32(len), 1061 }; 1062 memcpy(buf + sizeof(QCowExtension), s, len); 1063 1064 return ext_len; 1065 } 1066 1067 /* 1068 * Updates the qcow2 header, including the variable length parts of it, i.e. 1069 * the backing file name and all extensions. qcow2 was not designed to allow 1070 * such changes, so if we run out of space (we can only use the first cluster) 1071 * this function may fail. 1072 * 1073 * Returns 0 on success, -errno in error cases. 1074 */ 1075 int qcow2_update_header(BlockDriverState *bs) 1076 { 1077 BDRVQcowState *s = bs->opaque; 1078 QCowHeader *header; 1079 char *buf; 1080 size_t buflen = s->cluster_size; 1081 int ret; 1082 uint64_t total_size; 1083 uint32_t refcount_table_clusters; 1084 size_t header_length; 1085 Qcow2UnknownHeaderExtension *uext; 1086 1087 buf = qemu_blockalign(bs, buflen); 1088 1089 /* Header structure */ 1090 header = (QCowHeader*) buf; 1091 1092 if (buflen < sizeof(*header)) { 1093 ret = -ENOSPC; 1094 goto fail; 1095 } 1096 1097 header_length = sizeof(*header) + s->unknown_header_fields_size; 1098 total_size = bs->total_sectors * BDRV_SECTOR_SIZE; 1099 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); 1100 1101 *header = (QCowHeader) { 1102 /* Version 2 fields */ 1103 .magic = cpu_to_be32(QCOW_MAGIC), 1104 .version = cpu_to_be32(s->qcow_version), 1105 .backing_file_offset = 0, 1106 .backing_file_size = 0, 1107 .cluster_bits = cpu_to_be32(s->cluster_bits), 1108 .size = cpu_to_be64(total_size), 1109 .crypt_method = cpu_to_be32(s->crypt_method_header), 1110 .l1_size = cpu_to_be32(s->l1_size), 1111 .l1_table_offset = cpu_to_be64(s->l1_table_offset), 1112 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset), 1113 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), 1114 .nb_snapshots = cpu_to_be32(s->nb_snapshots), 1115 .snapshots_offset = cpu_to_be64(s->snapshots_offset), 1116 1117 /* Version 3 fields */ 1118 .incompatible_features = cpu_to_be64(s->incompatible_features), 1119 .compatible_features = cpu_to_be64(s->compatible_features), 1120 .autoclear_features = cpu_to_be64(s->autoclear_features), 1121 .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT), 1122 .header_length = cpu_to_be32(header_length), 1123 }; 1124 1125 /* For older versions, write a shorter header */ 1126 switch (s->qcow_version) { 1127 case 2: 1128 ret = offsetof(QCowHeader, incompatible_features); 1129 break; 1130 case 3: 1131 ret = sizeof(*header); 1132 break; 1133 default: 1134 ret = -EINVAL; 1135 goto fail; 1136 } 1137 1138 buf += ret; 1139 buflen -= ret; 1140 memset(buf, 0, buflen); 1141 1142 /* Preserve any unknown field in the header */ 1143 if (s->unknown_header_fields_size) { 1144 if (buflen < s->unknown_header_fields_size) { 1145 ret = -ENOSPC; 1146 goto fail; 1147 } 1148 1149 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); 1150 buf += s->unknown_header_fields_size; 1151 buflen -= s->unknown_header_fields_size; 1152 } 1153 1154 /* Backing file format header extension */ 1155 if (*bs->backing_format) { 1156 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT, 1157 bs->backing_format, strlen(bs->backing_format), 1158 buflen); 1159 if (ret < 0) { 1160 goto fail; 1161 } 1162 1163 buf += ret; 1164 buflen -= ret; 1165 } 1166 1167 /* Feature table */ 1168 Qcow2Feature features[] = { 1169 { 1170 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 1171 .bit = QCOW2_INCOMPAT_DIRTY_BITNR, 1172 .name = "dirty bit", 1173 }, 1174 { 1175 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, 1176 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, 1177 .name = "corrupt bit", 1178 }, 1179 { 1180 .type = QCOW2_FEAT_TYPE_COMPATIBLE, 1181 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, 1182 .name = "lazy refcounts", 1183 }, 1184 }; 1185 1186 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, 1187 features, sizeof(features), buflen); 1188 if (ret < 0) { 1189 goto fail; 1190 } 1191 buf += ret; 1192 buflen -= ret; 1193 1194 /* Keep unknown header extensions */ 1195 QLIST_FOREACH(uext, &s->unknown_header_ext, next) { 1196 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); 1197 if (ret < 0) { 1198 goto fail; 1199 } 1200 1201 buf += ret; 1202 buflen -= ret; 1203 } 1204 1205 /* End of header extensions */ 1206 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen); 1207 if (ret < 0) { 1208 goto fail; 1209 } 1210 1211 buf += ret; 1212 buflen -= ret; 1213 1214 /* Backing file name */ 1215 if (*bs->backing_file) { 1216 size_t backing_file_len = strlen(bs->backing_file); 1217 1218 if (buflen < backing_file_len) { 1219 ret = -ENOSPC; 1220 goto fail; 1221 } 1222 1223 /* Using strncpy is ok here, since buf is not NUL-terminated. */ 1224 strncpy(buf, bs->backing_file, buflen); 1225 1226 header->backing_file_offset = cpu_to_be64(buf - ((char*) header)); 1227 header->backing_file_size = cpu_to_be32(backing_file_len); 1228 } 1229 1230 /* Write the new header */ 1231 ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); 1232 if (ret < 0) { 1233 goto fail; 1234 } 1235 1236 ret = 0; 1237 fail: 1238 qemu_vfree(header); 1239 return ret; 1240 } 1241 1242 static int qcow2_change_backing_file(BlockDriverState *bs, 1243 const char *backing_file, const char *backing_fmt) 1244 { 1245 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); 1246 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); 1247 1248 return qcow2_update_header(bs); 1249 } 1250 1251 static int preallocate(BlockDriverState *bs) 1252 { 1253 uint64_t nb_sectors; 1254 uint64_t offset; 1255 uint64_t host_offset = 0; 1256 int num; 1257 int ret; 1258 QCowL2Meta *meta; 1259 1260 nb_sectors = bdrv_getlength(bs) >> 9; 1261 offset = 0; 1262 1263 while (nb_sectors) { 1264 num = MIN(nb_sectors, INT_MAX >> 9); 1265 ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, 1266 &host_offset, &meta); 1267 if (ret < 0) { 1268 return ret; 1269 } 1270 1271 ret = qcow2_alloc_cluster_link_l2(bs, meta); 1272 if (ret < 0) { 1273 qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters, 1274 QCOW2_DISCARD_NEVER); 1275 return ret; 1276 } 1277 1278 /* There are no dependent requests, but we need to remove our request 1279 * from the list of in-flight requests */ 1280 if (meta != NULL) { 1281 QLIST_REMOVE(meta, next_in_flight); 1282 } 1283 1284 /* TODO Preallocate data if requested */ 1285 1286 nb_sectors -= num; 1287 offset += num << 9; 1288 } 1289 1290 /* 1291 * It is expected that the image file is large enough to actually contain 1292 * all of the allocated clusters (otherwise we get failing reads after 1293 * EOF). Extend the image to the last allocated sector. 1294 */ 1295 if (host_offset != 0) { 1296 uint8_t buf[512]; 1297 memset(buf, 0, 512); 1298 ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1); 1299 if (ret < 0) { 1300 return ret; 1301 } 1302 } 1303 1304 return 0; 1305 } 1306 1307 static int qcow2_create2(const char *filename, int64_t total_size, 1308 const char *backing_file, const char *backing_format, 1309 int flags, size_t cluster_size, int prealloc, 1310 QEMUOptionParameter *options, int version) 1311 { 1312 /* Calculate cluster_bits */ 1313 int cluster_bits; 1314 cluster_bits = ffs(cluster_size) - 1; 1315 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || 1316 (1 << cluster_bits) != cluster_size) 1317 { 1318 error_report( 1319 "Cluster size must be a power of two between %d and %dk", 1320 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10)); 1321 return -EINVAL; 1322 } 1323 1324 /* 1325 * Open the image file and write a minimal qcow2 header. 1326 * 1327 * We keep things simple and start with a zero-sized image. We also 1328 * do without refcount blocks or a L1 table for now. We'll fix the 1329 * inconsistency later. 1330 * 1331 * We do need a refcount table because growing the refcount table means 1332 * allocating two new refcount blocks - the seconds of which would be at 1333 * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file 1334 * size for any qcow2 image. 1335 */ 1336 BlockDriverState* bs; 1337 QCowHeader header; 1338 uint8_t* refcount_table; 1339 int ret; 1340 1341 ret = bdrv_create_file(filename, options); 1342 if (ret < 0) { 1343 return ret; 1344 } 1345 1346 ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR); 1347 if (ret < 0) { 1348 return ret; 1349 } 1350 1351 /* Write the header */ 1352 memset(&header, 0, sizeof(header)); 1353 header.magic = cpu_to_be32(QCOW_MAGIC); 1354 header.version = cpu_to_be32(version); 1355 header.cluster_bits = cpu_to_be32(cluster_bits); 1356 header.size = cpu_to_be64(0); 1357 header.l1_table_offset = cpu_to_be64(0); 1358 header.l1_size = cpu_to_be32(0); 1359 header.refcount_table_offset = cpu_to_be64(cluster_size); 1360 header.refcount_table_clusters = cpu_to_be32(1); 1361 header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT); 1362 header.header_length = cpu_to_be32(sizeof(header)); 1363 1364 if (flags & BLOCK_FLAG_ENCRYPT) { 1365 header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); 1366 } else { 1367 header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 1368 } 1369 1370 if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) { 1371 header.compatible_features |= 1372 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); 1373 } 1374 1375 ret = bdrv_pwrite(bs, 0, &header, sizeof(header)); 1376 if (ret < 0) { 1377 goto out; 1378 } 1379 1380 /* Write an empty refcount table */ 1381 refcount_table = g_malloc0(cluster_size); 1382 ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size); 1383 g_free(refcount_table); 1384 1385 if (ret < 0) { 1386 goto out; 1387 } 1388 1389 bdrv_close(bs); 1390 1391 /* 1392 * And now open the image and make it consistent first (i.e. increase the 1393 * refcount of the cluster that is occupied by the header and the refcount 1394 * table) 1395 */ 1396 BlockDriver* drv = bdrv_find_format("qcow2"); 1397 assert(drv != NULL); 1398 ret = bdrv_open(bs, filename, NULL, 1399 BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv); 1400 if (ret < 0) { 1401 goto out; 1402 } 1403 1404 ret = qcow2_alloc_clusters(bs, 2 * cluster_size); 1405 if (ret < 0) { 1406 goto out; 1407 1408 } else if (ret != 0) { 1409 error_report("Huh, first cluster in empty image is already in use?"); 1410 abort(); 1411 } 1412 1413 /* Okay, now that we have a valid image, let's give it the right size */ 1414 ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE); 1415 if (ret < 0) { 1416 goto out; 1417 } 1418 1419 /* Want a backing file? There you go.*/ 1420 if (backing_file) { 1421 ret = bdrv_change_backing_file(bs, backing_file, backing_format); 1422 if (ret < 0) { 1423 goto out; 1424 } 1425 } 1426 1427 /* And if we're supposed to preallocate metadata, do that now */ 1428 if (prealloc) { 1429 BDRVQcowState *s = bs->opaque; 1430 qemu_co_mutex_lock(&s->lock); 1431 ret = preallocate(bs); 1432 qemu_co_mutex_unlock(&s->lock); 1433 if (ret < 0) { 1434 goto out; 1435 } 1436 } 1437 1438 ret = 0; 1439 out: 1440 bdrv_delete(bs); 1441 return ret; 1442 } 1443 1444 static int qcow2_create(const char *filename, QEMUOptionParameter *options) 1445 { 1446 const char *backing_file = NULL; 1447 const char *backing_fmt = NULL; 1448 uint64_t sectors = 0; 1449 int flags = 0; 1450 size_t cluster_size = DEFAULT_CLUSTER_SIZE; 1451 int prealloc = 0; 1452 int version = 3; 1453 1454 /* Read out options */ 1455 while (options && options->name) { 1456 if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 1457 sectors = options->value.n / 512; 1458 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { 1459 backing_file = options->value.s; 1460 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) { 1461 backing_fmt = options->value.s; 1462 } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) { 1463 flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0; 1464 } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { 1465 if (options->value.n) { 1466 cluster_size = options->value.n; 1467 } 1468 } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { 1469 if (!options->value.s || !strcmp(options->value.s, "off")) { 1470 prealloc = 0; 1471 } else if (!strcmp(options->value.s, "metadata")) { 1472 prealloc = 1; 1473 } else { 1474 fprintf(stderr, "Invalid preallocation mode: '%s'\n", 1475 options->value.s); 1476 return -EINVAL; 1477 } 1478 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) { 1479 if (!options->value.s) { 1480 /* keep the default */ 1481 } else if (!strcmp(options->value.s, "0.10")) { 1482 version = 2; 1483 } else if (!strcmp(options->value.s, "1.1")) { 1484 version = 3; 1485 } else { 1486 fprintf(stderr, "Invalid compatibility level: '%s'\n", 1487 options->value.s); 1488 return -EINVAL; 1489 } 1490 } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) { 1491 flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0; 1492 } 1493 options++; 1494 } 1495 1496 if (backing_file && prealloc) { 1497 fprintf(stderr, "Backing file and preallocation cannot be used at " 1498 "the same time\n"); 1499 return -EINVAL; 1500 } 1501 1502 if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) { 1503 fprintf(stderr, "Lazy refcounts only supported with compatibility " 1504 "level 1.1 and above (use compat=1.1 or greater)\n"); 1505 return -EINVAL; 1506 } 1507 1508 return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags, 1509 cluster_size, prealloc, options, version); 1510 } 1511 1512 static int qcow2_make_empty(BlockDriverState *bs) 1513 { 1514 #if 0 1515 /* XXX: not correct */ 1516 BDRVQcowState *s = bs->opaque; 1517 uint32_t l1_length = s->l1_size * sizeof(uint64_t); 1518 int ret; 1519 1520 memset(s->l1_table, 0, l1_length); 1521 if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0) 1522 return -1; 1523 ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length); 1524 if (ret < 0) 1525 return ret; 1526 1527 l2_cache_reset(bs); 1528 #endif 1529 return 0; 1530 } 1531 1532 static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, 1533 int64_t sector_num, int nb_sectors) 1534 { 1535 int ret; 1536 BDRVQcowState *s = bs->opaque; 1537 1538 /* Emulate misaligned zero writes */ 1539 if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) { 1540 return -ENOTSUP; 1541 } 1542 1543 /* Whatever is left can use real zero clusters */ 1544 qemu_co_mutex_lock(&s->lock); 1545 ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS, 1546 nb_sectors); 1547 qemu_co_mutex_unlock(&s->lock); 1548 1549 return ret; 1550 } 1551 1552 static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, 1553 int64_t sector_num, int nb_sectors) 1554 { 1555 int ret; 1556 BDRVQcowState *s = bs->opaque; 1557 1558 qemu_co_mutex_lock(&s->lock); 1559 ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, 1560 nb_sectors); 1561 qemu_co_mutex_unlock(&s->lock); 1562 return ret; 1563 } 1564 1565 static int qcow2_truncate(BlockDriverState *bs, int64_t offset) 1566 { 1567 BDRVQcowState *s = bs->opaque; 1568 int64_t new_l1_size; 1569 int ret; 1570 1571 if (offset & 511) { 1572 error_report("The new size must be a multiple of 512"); 1573 return -EINVAL; 1574 } 1575 1576 /* cannot proceed if image has snapshots */ 1577 if (s->nb_snapshots) { 1578 error_report("Can't resize an image which has snapshots"); 1579 return -ENOTSUP; 1580 } 1581 1582 /* shrinking is currently not supported */ 1583 if (offset < bs->total_sectors * 512) { 1584 error_report("qcow2 doesn't support shrinking images yet"); 1585 return -ENOTSUP; 1586 } 1587 1588 new_l1_size = size_to_l1(s, offset); 1589 ret = qcow2_grow_l1_table(bs, new_l1_size, true); 1590 if (ret < 0) { 1591 return ret; 1592 } 1593 1594 /* write updated header.size */ 1595 offset = cpu_to_be64(offset); 1596 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), 1597 &offset, sizeof(uint64_t)); 1598 if (ret < 0) { 1599 return ret; 1600 } 1601 1602 s->l1_vm_state_index = new_l1_size; 1603 return 0; 1604 } 1605 1606 /* XXX: put compressed sectors first, then all the cluster aligned 1607 tables to avoid losing bytes in alignment */ 1608 static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, 1609 const uint8_t *buf, int nb_sectors) 1610 { 1611 BDRVQcowState *s = bs->opaque; 1612 z_stream strm; 1613 int ret, out_len; 1614 uint8_t *out_buf; 1615 uint64_t cluster_offset; 1616 1617 if (nb_sectors == 0) { 1618 /* align end of file to a sector boundary to ease reading with 1619 sector based I/Os */ 1620 cluster_offset = bdrv_getlength(bs->file); 1621 cluster_offset = (cluster_offset + 511) & ~511; 1622 bdrv_truncate(bs->file, cluster_offset); 1623 return 0; 1624 } 1625 1626 if (nb_sectors != s->cluster_sectors) { 1627 ret = -EINVAL; 1628 1629 /* Zero-pad last write if image size is not cluster aligned */ 1630 if (sector_num + nb_sectors == bs->total_sectors && 1631 nb_sectors < s->cluster_sectors) { 1632 uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size); 1633 memset(pad_buf, 0, s->cluster_size); 1634 memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE); 1635 ret = qcow2_write_compressed(bs, sector_num, 1636 pad_buf, s->cluster_sectors); 1637 qemu_vfree(pad_buf); 1638 } 1639 return ret; 1640 } 1641 1642 out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); 1643 1644 /* best compression, small window, no zlib header */ 1645 memset(&strm, 0, sizeof(strm)); 1646 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, 1647 Z_DEFLATED, -12, 1648 9, Z_DEFAULT_STRATEGY); 1649 if (ret != 0) { 1650 ret = -EINVAL; 1651 goto fail; 1652 } 1653 1654 strm.avail_in = s->cluster_size; 1655 strm.next_in = (uint8_t *)buf; 1656 strm.avail_out = s->cluster_size; 1657 strm.next_out = out_buf; 1658 1659 ret = deflate(&strm, Z_FINISH); 1660 if (ret != Z_STREAM_END && ret != Z_OK) { 1661 deflateEnd(&strm); 1662 ret = -EINVAL; 1663 goto fail; 1664 } 1665 out_len = strm.next_out - out_buf; 1666 1667 deflateEnd(&strm); 1668 1669 if (ret != Z_STREAM_END || out_len >= s->cluster_size) { 1670 /* could not compress: write normal cluster */ 1671 ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors); 1672 if (ret < 0) { 1673 goto fail; 1674 } 1675 } else { 1676 cluster_offset = qcow2_alloc_compressed_cluster_offset(bs, 1677 sector_num << 9, out_len); 1678 if (!cluster_offset) { 1679 ret = -EIO; 1680 goto fail; 1681 } 1682 cluster_offset &= s->cluster_offset_mask; 1683 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); 1684 ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len); 1685 if (ret < 0) { 1686 goto fail; 1687 } 1688 } 1689 1690 ret = 0; 1691 fail: 1692 g_free(out_buf); 1693 return ret; 1694 } 1695 1696 static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) 1697 { 1698 BDRVQcowState *s = bs->opaque; 1699 int ret; 1700 1701 qemu_co_mutex_lock(&s->lock); 1702 ret = qcow2_cache_flush(bs, s->l2_table_cache); 1703 if (ret < 0) { 1704 qemu_co_mutex_unlock(&s->lock); 1705 return ret; 1706 } 1707 1708 if (qcow2_need_accurate_refcounts(s)) { 1709 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 1710 if (ret < 0) { 1711 qemu_co_mutex_unlock(&s->lock); 1712 return ret; 1713 } 1714 } 1715 qemu_co_mutex_unlock(&s->lock); 1716 1717 return 0; 1718 } 1719 1720 static int64_t qcow2_vm_state_offset(BDRVQcowState *s) 1721 { 1722 return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits); 1723 } 1724 1725 static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 1726 { 1727 BDRVQcowState *s = bs->opaque; 1728 bdi->cluster_size = s->cluster_size; 1729 bdi->vm_state_offset = qcow2_vm_state_offset(s); 1730 return 0; 1731 } 1732 1733 #if 0 1734 static void dump_refcounts(BlockDriverState *bs) 1735 { 1736 BDRVQcowState *s = bs->opaque; 1737 int64_t nb_clusters, k, k1, size; 1738 int refcount; 1739 1740 size = bdrv_getlength(bs->file); 1741 nb_clusters = size_to_clusters(s, size); 1742 for(k = 0; k < nb_clusters;) { 1743 k1 = k; 1744 refcount = get_refcount(bs, k); 1745 k++; 1746 while (k < nb_clusters && get_refcount(bs, k) == refcount) 1747 k++; 1748 printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount, 1749 k - k1); 1750 } 1751 } 1752 #endif 1753 1754 static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, 1755 int64_t pos) 1756 { 1757 BDRVQcowState *s = bs->opaque; 1758 int growable = bs->growable; 1759 int ret; 1760 1761 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); 1762 bs->growable = 1; 1763 ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov); 1764 bs->growable = growable; 1765 1766 return ret; 1767 } 1768 1769 static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, 1770 int64_t pos, int size) 1771 { 1772 BDRVQcowState *s = bs->opaque; 1773 int growable = bs->growable; 1774 bool zero_beyond_eof = bs->zero_beyond_eof; 1775 int ret; 1776 1777 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); 1778 bs->growable = 1; 1779 bs->zero_beyond_eof = false; 1780 ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size); 1781 bs->growable = growable; 1782 bs->zero_beyond_eof = zero_beyond_eof; 1783 1784 return ret; 1785 } 1786 1787 static QEMUOptionParameter qcow2_create_options[] = { 1788 { 1789 .name = BLOCK_OPT_SIZE, 1790 .type = OPT_SIZE, 1791 .help = "Virtual disk size" 1792 }, 1793 { 1794 .name = BLOCK_OPT_COMPAT_LEVEL, 1795 .type = OPT_STRING, 1796 .help = "Compatibility level (0.10 or 1.1)" 1797 }, 1798 { 1799 .name = BLOCK_OPT_BACKING_FILE, 1800 .type = OPT_STRING, 1801 .help = "File name of a base image" 1802 }, 1803 { 1804 .name = BLOCK_OPT_BACKING_FMT, 1805 .type = OPT_STRING, 1806 .help = "Image format of the base image" 1807 }, 1808 { 1809 .name = BLOCK_OPT_ENCRYPT, 1810 .type = OPT_FLAG, 1811 .help = "Encrypt the image" 1812 }, 1813 { 1814 .name = BLOCK_OPT_CLUSTER_SIZE, 1815 .type = OPT_SIZE, 1816 .help = "qcow2 cluster size", 1817 .value = { .n = DEFAULT_CLUSTER_SIZE }, 1818 }, 1819 { 1820 .name = BLOCK_OPT_PREALLOC, 1821 .type = OPT_STRING, 1822 .help = "Preallocation mode (allowed values: off, metadata)" 1823 }, 1824 { 1825 .name = BLOCK_OPT_LAZY_REFCOUNTS, 1826 .type = OPT_FLAG, 1827 .help = "Postpone refcount updates", 1828 }, 1829 { NULL } 1830 }; 1831 1832 static BlockDriver bdrv_qcow2 = { 1833 .format_name = "qcow2", 1834 .instance_size = sizeof(BDRVQcowState), 1835 .bdrv_probe = qcow2_probe, 1836 .bdrv_open = qcow2_open, 1837 .bdrv_close = qcow2_close, 1838 .bdrv_reopen_prepare = qcow2_reopen_prepare, 1839 .bdrv_create = qcow2_create, 1840 .bdrv_has_zero_init = bdrv_has_zero_init_1, 1841 .bdrv_co_is_allocated = qcow2_co_is_allocated, 1842 .bdrv_set_key = qcow2_set_key, 1843 .bdrv_make_empty = qcow2_make_empty, 1844 1845 .bdrv_co_readv = qcow2_co_readv, 1846 .bdrv_co_writev = qcow2_co_writev, 1847 .bdrv_co_flush_to_os = qcow2_co_flush_to_os, 1848 1849 .bdrv_co_write_zeroes = qcow2_co_write_zeroes, 1850 .bdrv_co_discard = qcow2_co_discard, 1851 .bdrv_truncate = qcow2_truncate, 1852 .bdrv_write_compressed = qcow2_write_compressed, 1853 1854 .bdrv_snapshot_create = qcow2_snapshot_create, 1855 .bdrv_snapshot_goto = qcow2_snapshot_goto, 1856 .bdrv_snapshot_delete = qcow2_snapshot_delete, 1857 .bdrv_snapshot_list = qcow2_snapshot_list, 1858 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, 1859 .bdrv_get_info = qcow2_get_info, 1860 1861 .bdrv_save_vmstate = qcow2_save_vmstate, 1862 .bdrv_load_vmstate = qcow2_load_vmstate, 1863 1864 .bdrv_change_backing_file = qcow2_change_backing_file, 1865 1866 .bdrv_invalidate_cache = qcow2_invalidate_cache, 1867 1868 .create_options = qcow2_create_options, 1869 .bdrv_check = qcow2_check, 1870 }; 1871 1872 static void bdrv_qcow2_init(void) 1873 { 1874 bdrv_register(&bdrv_qcow2); 1875 } 1876 1877 block_init(bdrv_qcow2_init); 1878