1 /* 2 * Block driver for the QCOW format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qapi/error.h" 27 #include "qemu/error-report.h" 28 #include "block/block_int.h" 29 #include "block/qdict.h" 30 #include "sysemu/block-backend.h" 31 #include "qemu/module.h" 32 #include "qemu/option.h" 33 #include "qemu/bswap.h" 34 #include "qemu/cutils.h" 35 #include "qemu/memalign.h" 36 #include <zlib.h> 37 #include "qapi/qmp/qdict.h" 38 #include "qapi/qmp/qstring.h" 39 #include "qapi/qobject-input-visitor.h" 40 #include "qapi/qapi-visit-block-core.h" 41 #include "crypto/block.h" 42 #include "migration/blocker.h" 43 #include "crypto.h" 44 45 /**************************************************************/ 46 /* QEMU COW block driver with compression and encryption support */ 47 48 #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) 49 #define QCOW_VERSION 1 50 51 #define QCOW_CRYPT_NONE 0 52 #define QCOW_CRYPT_AES 1 53 54 #define QCOW_OFLAG_COMPRESSED (1LL << 63) 55 56 typedef struct QCowHeader { 57 uint32_t magic; 58 uint32_t version; 59 uint64_t backing_file_offset; 60 uint32_t backing_file_size; 61 uint32_t mtime; 62 uint64_t size; /* in bytes */ 63 uint8_t cluster_bits; 64 uint8_t l2_bits; 65 uint16_t padding; 66 uint32_t crypt_method; 67 uint64_t l1_table_offset; 68 } QEMU_PACKED QCowHeader; 69 70 #define L2_CACHE_SIZE 16 71 72 typedef struct BDRVQcowState { 73 int cluster_bits; 74 int cluster_size; 75 int l2_bits; 76 int l2_size; 77 unsigned int l1_size; 78 uint64_t cluster_offset_mask; 79 uint64_t l1_table_offset; 80 uint64_t *l1_table; 81 uint64_t *l2_cache; 82 uint64_t l2_cache_offsets[L2_CACHE_SIZE]; 83 uint32_t l2_cache_counts[L2_CACHE_SIZE]; 84 uint8_t *cluster_cache; 85 uint8_t *cluster_data; 86 uint64_t cluster_cache_offset; 87 QCryptoBlock *crypto; /* Disk encryption format driver */ 88 uint32_t crypt_method_header; 89 CoMutex lock; 90 Error *migration_blocker; 91 } BDRVQcowState; 92 93 static QemuOptsList qcow_create_opts; 94 95 static int coroutine_fn GRAPH_RDLOCK 96 decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); 97 98 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) 99 { 100 const QCowHeader *cow_header = (const void *)buf; 101 102 if (buf_size >= sizeof(QCowHeader) && 103 be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 104 be32_to_cpu(cow_header->version) == QCOW_VERSION) 105 return 100; 106 else 107 return 0; 108 } 109 110 static int qcow_open(BlockDriverState *bs, QDict *options, int flags, 111 Error **errp) 112 { 113 BDRVQcowState *s = bs->opaque; 114 unsigned int len, i, shift; 115 int ret; 116 QCowHeader header; 117 QCryptoBlockOpenOptions *crypto_opts = NULL; 118 unsigned int cflags = 0; 119 QDict *encryptopts = NULL; 120 const char *encryptfmt; 121 122 qdict_extract_subqdict(options, &encryptopts, "encrypt."); 123 encryptfmt = qdict_get_try_str(encryptopts, "format"); 124 125 ret = bdrv_open_file_child(NULL, options, "file", bs, errp); 126 if (ret < 0) { 127 goto fail_unlocked; 128 } 129 130 bdrv_graph_rdlock_main_loop(); 131 132 ret = bdrv_pread(bs->file, 0, sizeof(header), &header, 0); 133 if (ret < 0) { 134 goto fail; 135 } 136 header.magic = be32_to_cpu(header.magic); 137 header.version = be32_to_cpu(header.version); 138 header.backing_file_offset = be64_to_cpu(header.backing_file_offset); 139 header.backing_file_size = be32_to_cpu(header.backing_file_size); 140 header.mtime = be32_to_cpu(header.mtime); 141 header.size = be64_to_cpu(header.size); 142 header.crypt_method = be32_to_cpu(header.crypt_method); 143 header.l1_table_offset = be64_to_cpu(header.l1_table_offset); 144 145 if (header.magic != QCOW_MAGIC) { 146 error_setg(errp, "Image not in qcow format"); 147 ret = -EINVAL; 148 goto fail; 149 } 150 if (header.version != QCOW_VERSION) { 151 error_setg(errp, "qcow (v%d) does not support qcow version %" PRIu32, 152 QCOW_VERSION, header.version); 153 if (header.version == 2 || header.version == 3) { 154 error_append_hint(errp, "Try the 'qcow2' driver instead.\n"); 155 } 156 157 ret = -ENOTSUP; 158 goto fail; 159 } 160 161 if (header.size <= 1) { 162 error_setg(errp, "Image size is too small (must be at least 2 bytes)"); 163 ret = -EINVAL; 164 goto fail; 165 } 166 if (header.cluster_bits < 9 || header.cluster_bits > 16) { 167 error_setg(errp, "Cluster size must be between 512 and 64k"); 168 ret = -EINVAL; 169 goto fail; 170 } 171 172 /* l2_bits specifies number of entries; storing a uint64_t in each entry, 173 * so bytes = num_entries << 3. */ 174 if (header.l2_bits < 9 - 3 || header.l2_bits > 16 - 3) { 175 error_setg(errp, "L2 table size must be between 512 and 64k"); 176 ret = -EINVAL; 177 goto fail; 178 } 179 180 s->crypt_method_header = header.crypt_method; 181 if (s->crypt_method_header) { 182 if (bdrv_uses_whitelist() && 183 s->crypt_method_header == QCOW_CRYPT_AES) { 184 error_setg(errp, 185 "Use of AES-CBC encrypted qcow images is no longer " 186 "supported in system emulators"); 187 error_append_hint(errp, 188 "You can use 'qemu-img convert' to convert your " 189 "image to an alternative supported format, such " 190 "as unencrypted qcow, or raw with the LUKS " 191 "format instead.\n"); 192 ret = -ENOSYS; 193 goto fail; 194 } 195 if (s->crypt_method_header == QCOW_CRYPT_AES) { 196 if (encryptfmt && !g_str_equal(encryptfmt, "aes")) { 197 error_setg(errp, 198 "Header reported 'aes' encryption format but " 199 "options specify '%s'", encryptfmt); 200 ret = -EINVAL; 201 goto fail; 202 } 203 qdict_put_str(encryptopts, "format", "qcow"); 204 crypto_opts = block_crypto_open_opts_init(encryptopts, errp); 205 if (!crypto_opts) { 206 ret = -EINVAL; 207 goto fail; 208 } 209 210 if (flags & BDRV_O_NO_IO) { 211 cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; 212 } 213 s->crypto = qcrypto_block_open(crypto_opts, "encrypt.", 214 NULL, NULL, cflags, errp); 215 if (!s->crypto) { 216 ret = -EINVAL; 217 goto fail; 218 } 219 } else { 220 error_setg(errp, "invalid encryption method in qcow header"); 221 ret = -EINVAL; 222 goto fail; 223 } 224 bs->encrypted = true; 225 } else { 226 if (encryptfmt) { 227 error_setg(errp, "No encryption in image header, but options " 228 "specified format '%s'", encryptfmt); 229 ret = -EINVAL; 230 goto fail; 231 } 232 } 233 s->cluster_bits = header.cluster_bits; 234 s->cluster_size = 1 << s->cluster_bits; 235 s->l2_bits = header.l2_bits; 236 s->l2_size = 1 << s->l2_bits; 237 bs->total_sectors = header.size / 512; 238 s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; 239 240 /* read the level 1 table */ 241 shift = s->cluster_bits + s->l2_bits; 242 if (header.size > UINT64_MAX - (1LL << shift)) { 243 error_setg(errp, "Image too large"); 244 ret = -EINVAL; 245 goto fail; 246 } else { 247 uint64_t l1_size = (header.size + (1LL << shift) - 1) >> shift; 248 if (l1_size > INT_MAX / sizeof(uint64_t)) { 249 error_setg(errp, "Image too large"); 250 ret = -EINVAL; 251 goto fail; 252 } 253 s->l1_size = l1_size; 254 } 255 256 s->l1_table_offset = header.l1_table_offset; 257 s->l1_table = g_try_new(uint64_t, s->l1_size); 258 if (s->l1_table == NULL) { 259 error_setg(errp, "Could not allocate memory for L1 table"); 260 ret = -ENOMEM; 261 goto fail; 262 } 263 264 ret = bdrv_pread(bs->file, s->l1_table_offset, 265 s->l1_size * sizeof(uint64_t), s->l1_table, 0); 266 if (ret < 0) { 267 goto fail; 268 } 269 270 for(i = 0;i < s->l1_size; i++) { 271 s->l1_table[i] = be64_to_cpu(s->l1_table[i]); 272 } 273 274 /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */ 275 s->l2_cache = 276 qemu_try_blockalign(bs->file->bs, 277 s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); 278 if (s->l2_cache == NULL) { 279 error_setg(errp, "Could not allocate L2 table cache"); 280 ret = -ENOMEM; 281 goto fail; 282 } 283 s->cluster_cache = g_malloc(s->cluster_size); 284 s->cluster_data = g_malloc(s->cluster_size); 285 s->cluster_cache_offset = -1; 286 287 /* read the backing file name */ 288 if (header.backing_file_offset != 0) { 289 len = header.backing_file_size; 290 if (len > 1023 || len >= sizeof(bs->backing_file)) { 291 error_setg(errp, "Backing file name too long"); 292 ret = -EINVAL; 293 goto fail; 294 } 295 ret = bdrv_pread(bs->file, header.backing_file_offset, len, 296 bs->auto_backing_file, 0); 297 if (ret < 0) { 298 goto fail; 299 } 300 bs->auto_backing_file[len] = '\0'; 301 pstrcpy(bs->backing_file, sizeof(bs->backing_file), 302 bs->auto_backing_file); 303 } 304 305 /* Disable migration when qcow images are used */ 306 error_setg(&s->migration_blocker, "The qcow format used by node '%s' " 307 "does not support live migration", 308 bdrv_get_device_or_node_name(bs)); 309 310 ret = migrate_add_blocker_normal(&s->migration_blocker, errp); 311 if (ret < 0) { 312 goto fail; 313 } 314 315 qobject_unref(encryptopts); 316 qapi_free_QCryptoBlockOpenOptions(crypto_opts); 317 qemu_co_mutex_init(&s->lock); 318 bdrv_graph_rdunlock_main_loop(); 319 return 0; 320 321 fail: 322 bdrv_graph_rdunlock_main_loop(); 323 fail_unlocked: 324 g_free(s->l1_table); 325 qemu_vfree(s->l2_cache); 326 g_free(s->cluster_cache); 327 g_free(s->cluster_data); 328 qcrypto_block_free(s->crypto); 329 qobject_unref(encryptopts); 330 qapi_free_QCryptoBlockOpenOptions(crypto_opts); 331 return ret; 332 } 333 334 335 /* We have nothing to do for QCOW reopen, stubs just return 336 * success */ 337 static int qcow_reopen_prepare(BDRVReopenState *state, 338 BlockReopenQueue *queue, Error **errp) 339 { 340 return 0; 341 } 342 343 344 /* 'allocate' is: 345 * 346 * 0 to not allocate. 347 * 348 * 1 to allocate a normal cluster (for sector-aligned byte offsets 'n_start' 349 * to 'n_end' within the cluster) 350 * 351 * 2 to allocate a compressed cluster of size 352 * 'compressed_size'. 'compressed_size' must be > 0 and < 353 * cluster_size 354 * 355 * return 0 if not allocated, 1 if *result is assigned, and negative 356 * errno on failure. 357 */ 358 static int coroutine_fn GRAPH_RDLOCK 359 get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate, 360 int compressed_size, int n_start, int n_end, 361 uint64_t *result) 362 { 363 BDRVQcowState *s = bs->opaque; 364 int min_index, i, j, l1_index, l2_index, ret; 365 int64_t l2_offset; 366 uint64_t *l2_table, cluster_offset, tmp; 367 uint32_t min_count; 368 int new_l2_table; 369 370 *result = 0; 371 l1_index = offset >> (s->l2_bits + s->cluster_bits); 372 l2_offset = s->l1_table[l1_index]; 373 new_l2_table = 0; 374 if (!l2_offset) { 375 if (!allocate) 376 return 0; 377 /* allocate a new l2 entry */ 378 l2_offset = bdrv_co_getlength(bs->file->bs); 379 if (l2_offset < 0) { 380 return l2_offset; 381 } 382 /* round to cluster size */ 383 l2_offset = QEMU_ALIGN_UP(l2_offset, s->cluster_size); 384 /* update the L1 entry */ 385 s->l1_table[l1_index] = l2_offset; 386 tmp = cpu_to_be64(l2_offset); 387 BLKDBG_CO_EVENT(bs->file, BLKDBG_L1_UPDATE); 388 ret = bdrv_co_pwrite_sync(bs->file, 389 s->l1_table_offset + l1_index * sizeof(tmp), 390 sizeof(tmp), &tmp, 0); 391 if (ret < 0) { 392 return ret; 393 } 394 new_l2_table = 1; 395 } 396 for(i = 0; i < L2_CACHE_SIZE; i++) { 397 if (l2_offset == s->l2_cache_offsets[i]) { 398 /* increment the hit count */ 399 if (++s->l2_cache_counts[i] == 0xffffffff) { 400 for(j = 0; j < L2_CACHE_SIZE; j++) { 401 s->l2_cache_counts[j] >>= 1; 402 } 403 } 404 l2_table = s->l2_cache + (i << s->l2_bits); 405 goto found; 406 } 407 } 408 /* not found: load a new entry in the least used one */ 409 min_index = 0; 410 min_count = 0xffffffff; 411 for(i = 0; i < L2_CACHE_SIZE; i++) { 412 if (s->l2_cache_counts[i] < min_count) { 413 min_count = s->l2_cache_counts[i]; 414 min_index = i; 415 } 416 } 417 l2_table = s->l2_cache + (min_index << s->l2_bits); 418 BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_LOAD); 419 if (new_l2_table) { 420 memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); 421 ret = bdrv_co_pwrite_sync(bs->file, l2_offset, 422 s->l2_size * sizeof(uint64_t), l2_table, 0); 423 if (ret < 0) { 424 return ret; 425 } 426 } else { 427 ret = bdrv_co_pread(bs->file, l2_offset, 428 s->l2_size * sizeof(uint64_t), l2_table, 0); 429 if (ret < 0) { 430 return ret; 431 } 432 } 433 s->l2_cache_offsets[min_index] = l2_offset; 434 s->l2_cache_counts[min_index] = 1; 435 found: 436 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 437 cluster_offset = be64_to_cpu(l2_table[l2_index]); 438 if (!cluster_offset || 439 ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) { 440 if (!allocate) 441 return 0; 442 BLKDBG_CO_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC); 443 assert(QEMU_IS_ALIGNED(n_start | n_end, BDRV_SECTOR_SIZE)); 444 /* allocate a new cluster */ 445 if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && 446 (n_end - n_start) < s->cluster_size) { 447 /* if the cluster is already compressed, we must 448 decompress it in the case it is not completely 449 overwritten */ 450 if (decompress_cluster(bs, cluster_offset) < 0) { 451 return -EIO; 452 } 453 cluster_offset = bdrv_co_getlength(bs->file->bs); 454 if ((int64_t) cluster_offset < 0) { 455 return cluster_offset; 456 } 457 cluster_offset = QEMU_ALIGN_UP(cluster_offset, s->cluster_size); 458 /* write the cluster content */ 459 BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO); 460 ret = bdrv_co_pwrite(bs->file, cluster_offset, s->cluster_size, 461 s->cluster_cache, 0); 462 if (ret < 0) { 463 return ret; 464 } 465 } else { 466 cluster_offset = bdrv_co_getlength(bs->file->bs); 467 if ((int64_t) cluster_offset < 0) { 468 return cluster_offset; 469 } 470 if (allocate == 1) { 471 /* round to cluster size */ 472 cluster_offset = QEMU_ALIGN_UP(cluster_offset, s->cluster_size); 473 if (cluster_offset + s->cluster_size > INT64_MAX) { 474 return -E2BIG; 475 } 476 ret = bdrv_co_truncate(bs->file, 477 cluster_offset + s->cluster_size, 478 false, PREALLOC_MODE_OFF, 0, NULL); 479 if (ret < 0) { 480 return ret; 481 } 482 /* if encrypted, we must initialize the cluster 483 content which won't be written */ 484 if (bs->encrypted && 485 (n_end - n_start) < s->cluster_size) { 486 uint64_t start_offset; 487 assert(s->crypto); 488 start_offset = offset & ~(s->cluster_size - 1); 489 for (i = 0; i < s->cluster_size; i += BDRV_SECTOR_SIZE) { 490 if (i < n_start || i >= n_end) { 491 memset(s->cluster_data, 0x00, BDRV_SECTOR_SIZE); 492 if (qcrypto_block_encrypt(s->crypto, 493 start_offset + i, 494 s->cluster_data, 495 BDRV_SECTOR_SIZE, 496 NULL) < 0) { 497 return -EIO; 498 } 499 BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO); 500 ret = bdrv_co_pwrite(bs->file, cluster_offset + i, 501 BDRV_SECTOR_SIZE, 502 s->cluster_data, 0); 503 if (ret < 0) { 504 return ret; 505 } 506 } 507 } 508 } 509 } else if (allocate == 2) { 510 cluster_offset |= QCOW_OFLAG_COMPRESSED | 511 (uint64_t)compressed_size << (63 - s->cluster_bits); 512 } 513 } 514 /* update L2 table */ 515 tmp = cpu_to_be64(cluster_offset); 516 l2_table[l2_index] = tmp; 517 if (allocate == 2) { 518 BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); 519 } else { 520 BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_UPDATE); 521 } 522 ret = bdrv_co_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp), 523 sizeof(tmp), &tmp, 0); 524 if (ret < 0) { 525 return ret; 526 } 527 } 528 *result = cluster_offset; 529 return 1; 530 } 531 532 static int coroutine_fn GRAPH_RDLOCK 533 qcow_co_block_status(BlockDriverState *bs, bool want_zero, 534 int64_t offset, int64_t bytes, int64_t *pnum, 535 int64_t *map, BlockDriverState **file) 536 { 537 BDRVQcowState *s = bs->opaque; 538 int index_in_cluster, ret; 539 int64_t n; 540 uint64_t cluster_offset; 541 542 qemu_co_mutex_lock(&s->lock); 543 ret = get_cluster_offset(bs, offset, 0, 0, 0, 0, &cluster_offset); 544 qemu_co_mutex_unlock(&s->lock); 545 if (ret < 0) { 546 return ret; 547 } 548 index_in_cluster = offset & (s->cluster_size - 1); 549 n = s->cluster_size - index_in_cluster; 550 if (n > bytes) { 551 n = bytes; 552 } 553 *pnum = n; 554 if (!cluster_offset) { 555 return 0; 556 } 557 if (cluster_offset & QCOW_OFLAG_COMPRESSED) { 558 return BDRV_BLOCK_DATA | BDRV_BLOCK_COMPRESSED; 559 } 560 if (s->crypto) { 561 return BDRV_BLOCK_DATA; 562 } 563 *map = cluster_offset | index_in_cluster; 564 *file = bs->file->bs; 565 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; 566 } 567 568 static int decompress_buffer(uint8_t *out_buf, int out_buf_size, 569 const uint8_t *buf, int buf_size) 570 { 571 z_stream strm1, *strm = &strm1; 572 int ret, out_len; 573 574 memset(strm, 0, sizeof(*strm)); 575 576 strm->next_in = (uint8_t *)buf; 577 strm->avail_in = buf_size; 578 strm->next_out = out_buf; 579 strm->avail_out = out_buf_size; 580 581 ret = inflateInit2(strm, -12); 582 if (ret != Z_OK) 583 return -1; 584 ret = inflate(strm, Z_FINISH); 585 out_len = strm->next_out - out_buf; 586 if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || 587 out_len != out_buf_size) { 588 inflateEnd(strm); 589 return -1; 590 } 591 inflateEnd(strm); 592 return 0; 593 } 594 595 static int coroutine_fn GRAPH_RDLOCK 596 decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) 597 { 598 BDRVQcowState *s = bs->opaque; 599 int ret, csize; 600 uint64_t coffset; 601 602 coffset = cluster_offset & s->cluster_offset_mask; 603 if (s->cluster_cache_offset != coffset) { 604 csize = cluster_offset >> (63 - s->cluster_bits); 605 csize &= (s->cluster_size - 1); 606 BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_COMPRESSED); 607 ret = bdrv_co_pread(bs->file, coffset, csize, s->cluster_data, 0); 608 if (ret < 0) 609 return -1; 610 if (decompress_buffer(s->cluster_cache, s->cluster_size, 611 s->cluster_data, csize) < 0) { 612 return -1; 613 } 614 s->cluster_cache_offset = coffset; 615 } 616 return 0; 617 } 618 619 static void qcow_refresh_limits(BlockDriverState *bs, Error **errp) 620 { 621 /* At least encrypted images require 512-byte alignment. Apply the 622 * limit universally, rather than just on encrypted images, as 623 * it's easier to let the block layer handle rounding than to 624 * audit this code further. */ 625 bs->bl.request_alignment = BDRV_SECTOR_SIZE; 626 } 627 628 static int coroutine_fn GRAPH_RDLOCK 629 qcow_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, 630 QEMUIOVector *qiov, BdrvRequestFlags flags) 631 { 632 BDRVQcowState *s = bs->opaque; 633 int offset_in_cluster; 634 int ret = 0, n; 635 uint64_t cluster_offset; 636 uint8_t *buf; 637 void *orig_buf; 638 639 if (qiov->niov > 1) { 640 buf = orig_buf = qemu_try_blockalign(bs, qiov->size); 641 if (buf == NULL) { 642 return -ENOMEM; 643 } 644 } else { 645 orig_buf = NULL; 646 buf = (uint8_t *)qiov->iov->iov_base; 647 } 648 649 qemu_co_mutex_lock(&s->lock); 650 651 while (bytes != 0) { 652 /* prepare next request */ 653 ret = get_cluster_offset(bs, offset, 0, 0, 0, 0, &cluster_offset); 654 if (ret < 0) { 655 break; 656 } 657 offset_in_cluster = offset & (s->cluster_size - 1); 658 n = s->cluster_size - offset_in_cluster; 659 if (n > bytes) { 660 n = bytes; 661 } 662 663 if (!cluster_offset) { 664 if (bs->backing) { 665 /* read from the base image */ 666 qemu_co_mutex_unlock(&s->lock); 667 /* qcow2 emits this on bs->file instead of bs->backing */ 668 BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); 669 ret = bdrv_co_pread(bs->backing, offset, n, buf, 0); 670 qemu_co_mutex_lock(&s->lock); 671 if (ret < 0) { 672 break; 673 } 674 } else { 675 /* Note: in this case, no need to wait */ 676 memset(buf, 0, n); 677 } 678 } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { 679 /* add AIO support for compressed blocks ? */ 680 if (decompress_cluster(bs, cluster_offset) < 0) { 681 ret = -EIO; 682 break; 683 } 684 memcpy(buf, s->cluster_cache + offset_in_cluster, n); 685 } else { 686 if ((cluster_offset & 511) != 0) { 687 ret = -EIO; 688 break; 689 } 690 qemu_co_mutex_unlock(&s->lock); 691 BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO); 692 ret = bdrv_co_pread(bs->file, cluster_offset + offset_in_cluster, 693 n, buf, 0); 694 qemu_co_mutex_lock(&s->lock); 695 if (ret < 0) { 696 break; 697 } 698 if (bs->encrypted) { 699 assert(s->crypto); 700 if (qcrypto_block_decrypt(s->crypto, 701 offset, buf, n, NULL) < 0) { 702 ret = -EIO; 703 break; 704 } 705 } 706 } 707 ret = 0; 708 709 bytes -= n; 710 offset += n; 711 buf += n; 712 } 713 714 qemu_co_mutex_unlock(&s->lock); 715 716 if (qiov->niov > 1) { 717 qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size); 718 qemu_vfree(orig_buf); 719 } 720 721 return ret; 722 } 723 724 static int coroutine_fn GRAPH_RDLOCK 725 qcow_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, 726 QEMUIOVector *qiov, BdrvRequestFlags flags) 727 { 728 BDRVQcowState *s = bs->opaque; 729 int offset_in_cluster; 730 uint64_t cluster_offset; 731 int ret = 0, n; 732 uint8_t *buf; 733 void *orig_buf; 734 735 s->cluster_cache_offset = -1; /* disable compressed cache */ 736 737 /* We must always copy the iov when encrypting, so we 738 * don't modify the original data buffer during encryption */ 739 if (bs->encrypted || qiov->niov > 1) { 740 buf = orig_buf = qemu_try_blockalign(bs, qiov->size); 741 if (buf == NULL) { 742 return -ENOMEM; 743 } 744 qemu_iovec_to_buf(qiov, 0, buf, qiov->size); 745 } else { 746 orig_buf = NULL; 747 buf = (uint8_t *)qiov->iov->iov_base; 748 } 749 750 qemu_co_mutex_lock(&s->lock); 751 752 while (bytes != 0) { 753 offset_in_cluster = offset & (s->cluster_size - 1); 754 n = s->cluster_size - offset_in_cluster; 755 if (n > bytes) { 756 n = bytes; 757 } 758 ret = get_cluster_offset(bs, offset, 1, 0, offset_in_cluster, 759 offset_in_cluster + n, &cluster_offset); 760 if (ret < 0) { 761 break; 762 } 763 if (!cluster_offset || (cluster_offset & 511) != 0) { 764 ret = -EIO; 765 break; 766 } 767 if (bs->encrypted) { 768 assert(s->crypto); 769 if (qcrypto_block_encrypt(s->crypto, offset, buf, n, NULL) < 0) { 770 ret = -EIO; 771 break; 772 } 773 } 774 775 qemu_co_mutex_unlock(&s->lock); 776 BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO); 777 ret = bdrv_co_pwrite(bs->file, cluster_offset + offset_in_cluster, 778 n, buf, 0); 779 qemu_co_mutex_lock(&s->lock); 780 if (ret < 0) { 781 break; 782 } 783 ret = 0; 784 785 bytes -= n; 786 offset += n; 787 buf += n; 788 } 789 qemu_co_mutex_unlock(&s->lock); 790 791 qemu_vfree(orig_buf); 792 793 return ret; 794 } 795 796 static void qcow_close(BlockDriverState *bs) 797 { 798 BDRVQcowState *s = bs->opaque; 799 800 qcrypto_block_free(s->crypto); 801 s->crypto = NULL; 802 g_free(s->l1_table); 803 qemu_vfree(s->l2_cache); 804 g_free(s->cluster_cache); 805 g_free(s->cluster_data); 806 807 migrate_del_blocker(&s->migration_blocker); 808 } 809 810 static int coroutine_fn GRAPH_UNLOCKED 811 qcow_co_create(BlockdevCreateOptions *opts, Error **errp) 812 { 813 BlockdevCreateOptionsQcow *qcow_opts; 814 int header_size, backing_filename_len, l1_size, shift, i; 815 QCowHeader header; 816 uint8_t *tmp; 817 int64_t total_size = 0; 818 int ret; 819 BlockDriverState *bs; 820 BlockBackend *qcow_blk; 821 QCryptoBlock *crypto = NULL; 822 823 assert(opts->driver == BLOCKDEV_DRIVER_QCOW); 824 qcow_opts = &opts->u.qcow; 825 826 /* Sanity checks */ 827 total_size = qcow_opts->size; 828 if (total_size == 0) { 829 error_setg(errp, "Image size is too small, cannot be zero length"); 830 return -EINVAL; 831 } 832 833 if (qcow_opts->encrypt && 834 qcow_opts->encrypt->format != QCRYPTO_BLOCK_FORMAT_QCOW) 835 { 836 error_setg(errp, "Unsupported encryption format"); 837 return -EINVAL; 838 } 839 840 /* Create BlockBackend to write to the image */ 841 bs = bdrv_co_open_blockdev_ref(qcow_opts->file, errp); 842 if (bs == NULL) { 843 return -EIO; 844 } 845 846 qcow_blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, 847 BLK_PERM_ALL, errp); 848 if (!qcow_blk) { 849 ret = -EPERM; 850 goto exit; 851 } 852 blk_set_allow_write_beyond_eof(qcow_blk, true); 853 854 /* Create image format */ 855 memset(&header, 0, sizeof(header)); 856 header.magic = cpu_to_be32(QCOW_MAGIC); 857 header.version = cpu_to_be32(QCOW_VERSION); 858 header.size = cpu_to_be64(total_size); 859 header_size = sizeof(header); 860 backing_filename_len = 0; 861 if (qcow_opts->backing_file) { 862 if (strcmp(qcow_opts->backing_file, "fat:")) { 863 header.backing_file_offset = cpu_to_be64(header_size); 864 backing_filename_len = strlen(qcow_opts->backing_file); 865 header.backing_file_size = cpu_to_be32(backing_filename_len); 866 header_size += backing_filename_len; 867 } else { 868 /* special backing file for vvfat */ 869 qcow_opts->backing_file = NULL; 870 } 871 header.cluster_bits = 9; /* 512 byte cluster to avoid copying 872 unmodified sectors */ 873 header.l2_bits = 12; /* 32 KB L2 tables */ 874 } else { 875 header.cluster_bits = 12; /* 4 KB clusters */ 876 header.l2_bits = 9; /* 4 KB L2 tables */ 877 } 878 header_size = (header_size + 7) & ~7; 879 shift = header.cluster_bits + header.l2_bits; 880 l1_size = (total_size + (1LL << shift) - 1) >> shift; 881 882 header.l1_table_offset = cpu_to_be64(header_size); 883 884 if (qcow_opts->encrypt) { 885 header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); 886 887 crypto = qcrypto_block_create(qcow_opts->encrypt, "encrypt.", 888 NULL, NULL, NULL, 0, errp); 889 if (!crypto) { 890 ret = -EINVAL; 891 goto exit; 892 } 893 } else { 894 header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 895 } 896 897 /* write all the data */ 898 ret = blk_co_pwrite(qcow_blk, 0, sizeof(header), &header, 0); 899 if (ret < 0) { 900 goto exit; 901 } 902 903 if (qcow_opts->backing_file) { 904 ret = blk_co_pwrite(qcow_blk, sizeof(header), backing_filename_len, 905 qcow_opts->backing_file, 0); 906 if (ret < 0) { 907 goto exit; 908 } 909 } 910 911 tmp = g_malloc0(BDRV_SECTOR_SIZE); 912 for (i = 0; i < DIV_ROUND_UP(sizeof(uint64_t) * l1_size, BDRV_SECTOR_SIZE); 913 i++) { 914 ret = blk_co_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i, 915 BDRV_SECTOR_SIZE, tmp, 0); 916 if (ret < 0) { 917 g_free(tmp); 918 goto exit; 919 } 920 } 921 922 g_free(tmp); 923 ret = 0; 924 exit: 925 blk_co_unref(qcow_blk); 926 bdrv_co_unref(bs); 927 qcrypto_block_free(crypto); 928 return ret; 929 } 930 931 static int coroutine_fn GRAPH_UNLOCKED 932 qcow_co_create_opts(BlockDriver *drv, const char *filename, 933 QemuOpts *opts, Error **errp) 934 { 935 BlockdevCreateOptions *create_options = NULL; 936 BlockDriverState *bs = NULL; 937 QDict *qdict = NULL; 938 Visitor *v; 939 const char *val; 940 int ret; 941 char *backing_fmt; 942 943 static const QDictRenames opt_renames[] = { 944 { BLOCK_OPT_BACKING_FILE, "backing-file" }, 945 { BLOCK_OPT_ENCRYPT, BLOCK_OPT_ENCRYPT_FORMAT }, 946 { NULL, NULL }, 947 }; 948 949 /* 950 * We can't actually store a backing format, but can check that 951 * the user's request made sense. 952 */ 953 backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT); 954 if (backing_fmt && !bdrv_find_format(backing_fmt)) { 955 error_setg(errp, "unrecognized backing format '%s'", backing_fmt); 956 ret = -EINVAL; 957 goto fail; 958 } 959 960 /* Parse options and convert legacy syntax */ 961 qdict = qemu_opts_to_qdict_filtered(opts, NULL, &qcow_create_opts, true); 962 963 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT); 964 if (val && !strcmp(val, "on")) { 965 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT, "qcow"); 966 } else if (val && !strcmp(val, "off")) { 967 qdict_del(qdict, BLOCK_OPT_ENCRYPT); 968 } 969 970 val = qdict_get_try_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT); 971 if (val && !strcmp(val, "aes")) { 972 qdict_put_str(qdict, BLOCK_OPT_ENCRYPT_FORMAT, "qcow"); 973 } 974 975 if (!qdict_rename_keys(qdict, opt_renames, errp)) { 976 ret = -EINVAL; 977 goto fail; 978 } 979 980 /* Create and open the file (protocol layer) */ 981 ret = bdrv_co_create_file(filename, opts, errp); 982 if (ret < 0) { 983 goto fail; 984 } 985 986 bs = bdrv_co_open(filename, NULL, NULL, 987 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); 988 if (bs == NULL) { 989 ret = -EIO; 990 goto fail; 991 } 992 993 /* Now get the QAPI type BlockdevCreateOptions */ 994 qdict_put_str(qdict, "driver", "qcow"); 995 qdict_put_str(qdict, "file", bs->node_name); 996 997 v = qobject_input_visitor_new_flat_confused(qdict, errp); 998 if (!v) { 999 ret = -EINVAL; 1000 goto fail; 1001 } 1002 1003 visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); 1004 visit_free(v); 1005 if (!create_options) { 1006 ret = -EINVAL; 1007 goto fail; 1008 } 1009 1010 /* Silently round up size */ 1011 assert(create_options->driver == BLOCKDEV_DRIVER_QCOW); 1012 create_options->u.qcow.size = 1013 ROUND_UP(create_options->u.qcow.size, BDRV_SECTOR_SIZE); 1014 1015 /* Create the qcow image (format layer) */ 1016 ret = qcow_co_create(create_options, errp); 1017 if (ret < 0) { 1018 goto fail; 1019 } 1020 1021 ret = 0; 1022 fail: 1023 g_free(backing_fmt); 1024 qobject_unref(qdict); 1025 bdrv_co_unref(bs); 1026 qapi_free_BlockdevCreateOptions(create_options); 1027 return ret; 1028 } 1029 1030 static int GRAPH_RDLOCK qcow_make_empty(BlockDriverState *bs) 1031 { 1032 BDRVQcowState *s = bs->opaque; 1033 uint32_t l1_length = s->l1_size * sizeof(uint64_t); 1034 int ret; 1035 1036 memset(s->l1_table, 0, l1_length); 1037 if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, l1_length, s->l1_table, 1038 0) < 0) 1039 return -1; 1040 ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false, 1041 PREALLOC_MODE_OFF, 0, NULL); 1042 if (ret < 0) 1043 return ret; 1044 1045 memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); 1046 memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); 1047 memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); 1048 1049 return 0; 1050 } 1051 1052 /* XXX: put compressed sectors first, then all the cluster aligned 1053 tables to avoid losing bytes in alignment */ 1054 static int coroutine_fn GRAPH_RDLOCK 1055 qcow_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes, 1056 QEMUIOVector *qiov) 1057 { 1058 BDRVQcowState *s = bs->opaque; 1059 z_stream strm; 1060 int ret, out_len; 1061 uint8_t *buf, *out_buf; 1062 uint64_t cluster_offset; 1063 1064 buf = qemu_blockalign(bs, s->cluster_size); 1065 if (bytes != s->cluster_size) { 1066 if (bytes > s->cluster_size || 1067 offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS) 1068 { 1069 qemu_vfree(buf); 1070 return -EINVAL; 1071 } 1072 /* Zero-pad last write if image size is not cluster aligned */ 1073 memset(buf + bytes, 0, s->cluster_size - bytes); 1074 } 1075 qemu_iovec_to_buf(qiov, 0, buf, qiov->size); 1076 1077 out_buf = g_malloc(s->cluster_size); 1078 1079 /* best compression, small window, no zlib header */ 1080 memset(&strm, 0, sizeof(strm)); 1081 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, 1082 Z_DEFLATED, -12, 1083 9, Z_DEFAULT_STRATEGY); 1084 if (ret != 0) { 1085 ret = -EINVAL; 1086 goto fail; 1087 } 1088 1089 strm.avail_in = s->cluster_size; 1090 strm.next_in = (uint8_t *)buf; 1091 strm.avail_out = s->cluster_size; 1092 strm.next_out = out_buf; 1093 1094 ret = deflate(&strm, Z_FINISH); 1095 if (ret != Z_STREAM_END && ret != Z_OK) { 1096 deflateEnd(&strm); 1097 ret = -EINVAL; 1098 goto fail; 1099 } 1100 out_len = strm.next_out - out_buf; 1101 1102 deflateEnd(&strm); 1103 1104 if (ret != Z_STREAM_END || out_len >= s->cluster_size) { 1105 /* could not compress: write normal cluster */ 1106 ret = qcow_co_pwritev(bs, offset, bytes, qiov, 0); 1107 if (ret < 0) { 1108 goto fail; 1109 } 1110 goto success; 1111 } 1112 qemu_co_mutex_lock(&s->lock); 1113 ret = get_cluster_offset(bs, offset, 2, out_len, 0, 0, &cluster_offset); 1114 qemu_co_mutex_unlock(&s->lock); 1115 if (ret < 0) { 1116 goto fail; 1117 } 1118 if (cluster_offset == 0) { 1119 ret = -EIO; 1120 goto fail; 1121 } 1122 cluster_offset &= s->cluster_offset_mask; 1123 1124 BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); 1125 ret = bdrv_co_pwrite(bs->file, cluster_offset, out_len, out_buf, 0); 1126 if (ret < 0) { 1127 goto fail; 1128 } 1129 success: 1130 ret = 0; 1131 fail: 1132 qemu_vfree(buf); 1133 g_free(out_buf); 1134 return ret; 1135 } 1136 1137 static int coroutine_fn 1138 qcow_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 1139 { 1140 BDRVQcowState *s = bs->opaque; 1141 bdi->cluster_size = s->cluster_size; 1142 return 0; 1143 } 1144 1145 static QemuOptsList qcow_create_opts = { 1146 .name = "qcow-create-opts", 1147 .head = QTAILQ_HEAD_INITIALIZER(qcow_create_opts.head), 1148 .desc = { 1149 { 1150 .name = BLOCK_OPT_SIZE, 1151 .type = QEMU_OPT_SIZE, 1152 .help = "Virtual disk size" 1153 }, 1154 { 1155 .name = BLOCK_OPT_BACKING_FILE, 1156 .type = QEMU_OPT_STRING, 1157 .help = "File name of a base image" 1158 }, 1159 { 1160 .name = BLOCK_OPT_BACKING_FMT, 1161 .type = QEMU_OPT_STRING, 1162 .help = "Format of the backing image", 1163 }, 1164 { 1165 .name = BLOCK_OPT_ENCRYPT, 1166 .type = QEMU_OPT_BOOL, 1167 .help = "Encrypt the image with format 'aes'. (Deprecated " 1168 "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)", 1169 }, 1170 { 1171 .name = BLOCK_OPT_ENCRYPT_FORMAT, 1172 .type = QEMU_OPT_STRING, 1173 .help = "Encrypt the image, format choices: 'aes'", 1174 }, 1175 BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."), 1176 { /* end of list */ } 1177 } 1178 }; 1179 1180 static const char *const qcow_strong_runtime_opts[] = { 1181 "encrypt." BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET, 1182 1183 NULL 1184 }; 1185 1186 static BlockDriver bdrv_qcow = { 1187 .format_name = "qcow", 1188 .instance_size = sizeof(BDRVQcowState), 1189 .bdrv_probe = qcow_probe, 1190 .bdrv_open = qcow_open, 1191 .bdrv_close = qcow_close, 1192 .bdrv_child_perm = bdrv_default_perms, 1193 .bdrv_reopen_prepare = qcow_reopen_prepare, 1194 .bdrv_co_create = qcow_co_create, 1195 .bdrv_co_create_opts = qcow_co_create_opts, 1196 .bdrv_has_zero_init = bdrv_has_zero_init_1, 1197 .is_format = true, 1198 .supports_backing = true, 1199 .bdrv_refresh_limits = qcow_refresh_limits, 1200 1201 .bdrv_co_preadv = qcow_co_preadv, 1202 .bdrv_co_pwritev = qcow_co_pwritev, 1203 .bdrv_co_block_status = qcow_co_block_status, 1204 1205 .bdrv_make_empty = qcow_make_empty, 1206 .bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed, 1207 .bdrv_co_get_info = qcow_co_get_info, 1208 1209 .create_opts = &qcow_create_opts, 1210 .strong_runtime_opts = qcow_strong_runtime_opts, 1211 }; 1212 1213 static void bdrv_qcow_init(void) 1214 { 1215 bdrv_register(&bdrv_qcow); 1216 } 1217 1218 block_init(bdrv_qcow_init); 1219