1 /* 2 * Block driver for the QCOW format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "qemu-common.h" 25 #include "block/block_int.h" 26 #include "qemu/module.h" 27 #include <zlib.h> 28 #include "qapi/qmp/qerror.h" 29 #include "crypto/cipher.h" 30 #include "migration/migration.h" 31 32 /**************************************************************/ 33 /* QEMU COW block driver with compression and encryption support */ 34 35 #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) 36 #define QCOW_VERSION 1 37 38 #define QCOW_CRYPT_NONE 0 39 #define QCOW_CRYPT_AES 1 40 41 #define QCOW_OFLAG_COMPRESSED (1LL << 63) 42 43 typedef struct QCowHeader { 44 uint32_t magic; 45 uint32_t version; 46 uint64_t backing_file_offset; 47 uint32_t backing_file_size; 48 uint32_t mtime; 49 uint64_t size; /* in bytes */ 50 uint8_t cluster_bits; 51 uint8_t l2_bits; 52 uint16_t padding; 53 uint32_t crypt_method; 54 uint64_t l1_table_offset; 55 } QEMU_PACKED QCowHeader; 56 57 #define L2_CACHE_SIZE 16 58 59 typedef struct BDRVQcowState { 60 int cluster_bits; 61 int cluster_size; 62 int cluster_sectors; 63 int l2_bits; 64 int l2_size; 65 unsigned int l1_size; 66 uint64_t cluster_offset_mask; 67 uint64_t l1_table_offset; 68 uint64_t *l1_table; 69 uint64_t *l2_cache; 70 uint64_t l2_cache_offsets[L2_CACHE_SIZE]; 71 uint32_t l2_cache_counts[L2_CACHE_SIZE]; 72 uint8_t *cluster_cache; 73 uint8_t *cluster_data; 74 uint64_t cluster_cache_offset; 75 QCryptoCipher *cipher; /* NULL if no key yet */ 76 uint32_t crypt_method_header; 77 CoMutex lock; 78 Error *migration_blocker; 79 } BDRVQcowState; 80 81 static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); 82 83 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) 84 { 85 const QCowHeader *cow_header = (const void *)buf; 86 87 if (buf_size >= sizeof(QCowHeader) && 88 be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 89 be32_to_cpu(cow_header->version) == QCOW_VERSION) 90 return 100; 91 else 92 return 0; 93 } 94 95 static int qcow_open(BlockDriverState *bs, QDict *options, int flags, 96 Error **errp) 97 { 98 BDRVQcowState *s = bs->opaque; 99 unsigned int len, i, shift; 100 int ret; 101 QCowHeader header; 102 103 ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header)); 104 if (ret < 0) { 105 goto fail; 106 } 107 be32_to_cpus(&header.magic); 108 be32_to_cpus(&header.version); 109 be64_to_cpus(&header.backing_file_offset); 110 be32_to_cpus(&header.backing_file_size); 111 be32_to_cpus(&header.mtime); 112 be64_to_cpus(&header.size); 113 be32_to_cpus(&header.crypt_method); 114 be64_to_cpus(&header.l1_table_offset); 115 116 if (header.magic != QCOW_MAGIC) { 117 error_setg(errp, "Image not in qcow format"); 118 ret = -EINVAL; 119 goto fail; 120 } 121 if (header.version != QCOW_VERSION) { 122 char version[64]; 123 snprintf(version, sizeof(version), "QCOW version %" PRIu32, 124 header.version); 125 error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, 126 bdrv_get_device_or_node_name(bs), "qcow", version); 127 ret = -ENOTSUP; 128 goto fail; 129 } 130 131 if (header.size <= 1) { 132 error_setg(errp, "Image size is too small (must be at least 2 bytes)"); 133 ret = -EINVAL; 134 goto fail; 135 } 136 if (header.cluster_bits < 9 || header.cluster_bits > 16) { 137 error_setg(errp, "Cluster size must be between 512 and 64k"); 138 ret = -EINVAL; 139 goto fail; 140 } 141 142 /* l2_bits specifies number of entries; storing a uint64_t in each entry, 143 * so bytes = num_entries << 3. */ 144 if (header.l2_bits < 9 - 3 || header.l2_bits > 16 - 3) { 145 error_setg(errp, "L2 table size must be between 512 and 64k"); 146 ret = -EINVAL; 147 goto fail; 148 } 149 150 if (header.crypt_method > QCOW_CRYPT_AES) { 151 error_setg(errp, "invalid encryption method in qcow header"); 152 ret = -EINVAL; 153 goto fail; 154 } 155 if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) { 156 error_setg(errp, "AES cipher not available"); 157 ret = -EINVAL; 158 goto fail; 159 } 160 s->crypt_method_header = header.crypt_method; 161 if (s->crypt_method_header) { 162 bs->encrypted = 1; 163 } 164 s->cluster_bits = header.cluster_bits; 165 s->cluster_size = 1 << s->cluster_bits; 166 s->cluster_sectors = 1 << (s->cluster_bits - 9); 167 s->l2_bits = header.l2_bits; 168 s->l2_size = 1 << s->l2_bits; 169 bs->total_sectors = header.size / 512; 170 s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; 171 172 /* read the level 1 table */ 173 shift = s->cluster_bits + s->l2_bits; 174 if (header.size > UINT64_MAX - (1LL << shift)) { 175 error_setg(errp, "Image too large"); 176 ret = -EINVAL; 177 goto fail; 178 } else { 179 uint64_t l1_size = (header.size + (1LL << shift) - 1) >> shift; 180 if (l1_size > INT_MAX / sizeof(uint64_t)) { 181 error_setg(errp, "Image too large"); 182 ret = -EINVAL; 183 goto fail; 184 } 185 s->l1_size = l1_size; 186 } 187 188 s->l1_table_offset = header.l1_table_offset; 189 s->l1_table = g_try_new(uint64_t, s->l1_size); 190 if (s->l1_table == NULL) { 191 error_setg(errp, "Could not allocate memory for L1 table"); 192 ret = -ENOMEM; 193 goto fail; 194 } 195 196 ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table, 197 s->l1_size * sizeof(uint64_t)); 198 if (ret < 0) { 199 goto fail; 200 } 201 202 for(i = 0;i < s->l1_size; i++) { 203 be64_to_cpus(&s->l1_table[i]); 204 } 205 206 /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */ 207 s->l2_cache = 208 qemu_try_blockalign(bs->file->bs, 209 s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); 210 if (s->l2_cache == NULL) { 211 error_setg(errp, "Could not allocate L2 table cache"); 212 ret = -ENOMEM; 213 goto fail; 214 } 215 s->cluster_cache = g_malloc(s->cluster_size); 216 s->cluster_data = g_malloc(s->cluster_size); 217 s->cluster_cache_offset = -1; 218 219 /* read the backing file name */ 220 if (header.backing_file_offset != 0) { 221 len = header.backing_file_size; 222 if (len > 1023 || len >= sizeof(bs->backing_file)) { 223 error_setg(errp, "Backing file name too long"); 224 ret = -EINVAL; 225 goto fail; 226 } 227 ret = bdrv_pread(bs->file->bs, header.backing_file_offset, 228 bs->backing_file, len); 229 if (ret < 0) { 230 goto fail; 231 } 232 bs->backing_file[len] = '\0'; 233 } 234 235 /* Disable migration when qcow images are used */ 236 error_setg(&s->migration_blocker, "The qcow format used by node '%s' " 237 "does not support live migration", 238 bdrv_get_device_or_node_name(bs)); 239 migrate_add_blocker(s->migration_blocker); 240 241 qemu_co_mutex_init(&s->lock); 242 return 0; 243 244 fail: 245 g_free(s->l1_table); 246 qemu_vfree(s->l2_cache); 247 g_free(s->cluster_cache); 248 g_free(s->cluster_data); 249 return ret; 250 } 251 252 253 /* We have nothing to do for QCOW reopen, stubs just return 254 * success */ 255 static int qcow_reopen_prepare(BDRVReopenState *state, 256 BlockReopenQueue *queue, Error **errp) 257 { 258 return 0; 259 } 260 261 static int qcow_set_key(BlockDriverState *bs, const char *key) 262 { 263 BDRVQcowState *s = bs->opaque; 264 uint8_t keybuf[16]; 265 int len, i; 266 Error *err; 267 268 memset(keybuf, 0, 16); 269 len = strlen(key); 270 if (len > 16) 271 len = 16; 272 /* XXX: we could compress the chars to 7 bits to increase 273 entropy */ 274 for(i = 0;i < len;i++) { 275 keybuf[i] = key[i]; 276 } 277 assert(bs->encrypted); 278 279 qcrypto_cipher_free(s->cipher); 280 s->cipher = qcrypto_cipher_new( 281 QCRYPTO_CIPHER_ALG_AES_128, 282 QCRYPTO_CIPHER_MODE_CBC, 283 keybuf, G_N_ELEMENTS(keybuf), 284 &err); 285 286 if (!s->cipher) { 287 /* XXX would be nice if errors in this method could 288 * be properly propagate to the caller. Would need 289 * the bdrv_set_key() API signature to be fixed. */ 290 error_free(err); 291 return -1; 292 } 293 return 0; 294 } 295 296 /* The crypt function is compatible with the linux cryptoloop 297 algorithm for < 4 GB images. NOTE: out_buf == in_buf is 298 supported */ 299 static int encrypt_sectors(BDRVQcowState *s, int64_t sector_num, 300 uint8_t *out_buf, const uint8_t *in_buf, 301 int nb_sectors, bool enc, Error **errp) 302 { 303 union { 304 uint64_t ll[2]; 305 uint8_t b[16]; 306 } ivec; 307 int i; 308 int ret; 309 310 for(i = 0; i < nb_sectors; i++) { 311 ivec.ll[0] = cpu_to_le64(sector_num); 312 ivec.ll[1] = 0; 313 if (qcrypto_cipher_setiv(s->cipher, 314 ivec.b, G_N_ELEMENTS(ivec.b), 315 errp) < 0) { 316 return -1; 317 } 318 if (enc) { 319 ret = qcrypto_cipher_encrypt(s->cipher, 320 in_buf, 321 out_buf, 322 512, 323 errp); 324 } else { 325 ret = qcrypto_cipher_decrypt(s->cipher, 326 in_buf, 327 out_buf, 328 512, 329 errp); 330 } 331 if (ret < 0) { 332 return -1; 333 } 334 sector_num++; 335 in_buf += 512; 336 out_buf += 512; 337 } 338 return 0; 339 } 340 341 /* 'allocate' is: 342 * 343 * 0 to not allocate. 344 * 345 * 1 to allocate a normal cluster (for sector indexes 'n_start' to 346 * 'n_end') 347 * 348 * 2 to allocate a compressed cluster of size 349 * 'compressed_size'. 'compressed_size' must be > 0 and < 350 * cluster_size 351 * 352 * return 0 if not allocated. 353 */ 354 static uint64_t get_cluster_offset(BlockDriverState *bs, 355 uint64_t offset, int allocate, 356 int compressed_size, 357 int n_start, int n_end) 358 { 359 BDRVQcowState *s = bs->opaque; 360 int min_index, i, j, l1_index, l2_index; 361 uint64_t l2_offset, *l2_table, cluster_offset, tmp; 362 uint32_t min_count; 363 int new_l2_table; 364 365 l1_index = offset >> (s->l2_bits + s->cluster_bits); 366 l2_offset = s->l1_table[l1_index]; 367 new_l2_table = 0; 368 if (!l2_offset) { 369 if (!allocate) 370 return 0; 371 /* allocate a new l2 entry */ 372 l2_offset = bdrv_getlength(bs->file->bs); 373 /* round to cluster size */ 374 l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); 375 /* update the L1 entry */ 376 s->l1_table[l1_index] = l2_offset; 377 tmp = cpu_to_be64(l2_offset); 378 if (bdrv_pwrite_sync(bs->file->bs, 379 s->l1_table_offset + l1_index * sizeof(tmp), 380 &tmp, sizeof(tmp)) < 0) 381 return 0; 382 new_l2_table = 1; 383 } 384 for(i = 0; i < L2_CACHE_SIZE; i++) { 385 if (l2_offset == s->l2_cache_offsets[i]) { 386 /* increment the hit count */ 387 if (++s->l2_cache_counts[i] == 0xffffffff) { 388 for(j = 0; j < L2_CACHE_SIZE; j++) { 389 s->l2_cache_counts[j] >>= 1; 390 } 391 } 392 l2_table = s->l2_cache + (i << s->l2_bits); 393 goto found; 394 } 395 } 396 /* not found: load a new entry in the least used one */ 397 min_index = 0; 398 min_count = 0xffffffff; 399 for(i = 0; i < L2_CACHE_SIZE; i++) { 400 if (s->l2_cache_counts[i] < min_count) { 401 min_count = s->l2_cache_counts[i]; 402 min_index = i; 403 } 404 } 405 l2_table = s->l2_cache + (min_index << s->l2_bits); 406 if (new_l2_table) { 407 memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); 408 if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table, 409 s->l2_size * sizeof(uint64_t)) < 0) 410 return 0; 411 } else { 412 if (bdrv_pread(bs->file->bs, l2_offset, l2_table, 413 s->l2_size * sizeof(uint64_t)) != 414 s->l2_size * sizeof(uint64_t)) 415 return 0; 416 } 417 s->l2_cache_offsets[min_index] = l2_offset; 418 s->l2_cache_counts[min_index] = 1; 419 found: 420 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 421 cluster_offset = be64_to_cpu(l2_table[l2_index]); 422 if (!cluster_offset || 423 ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) { 424 if (!allocate) 425 return 0; 426 /* allocate a new cluster */ 427 if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && 428 (n_end - n_start) < s->cluster_sectors) { 429 /* if the cluster is already compressed, we must 430 decompress it in the case it is not completely 431 overwritten */ 432 if (decompress_cluster(bs, cluster_offset) < 0) 433 return 0; 434 cluster_offset = bdrv_getlength(bs->file->bs); 435 cluster_offset = (cluster_offset + s->cluster_size - 1) & 436 ~(s->cluster_size - 1); 437 /* write the cluster content */ 438 if (bdrv_pwrite(bs->file->bs, cluster_offset, s->cluster_cache, 439 s->cluster_size) != 440 s->cluster_size) 441 return -1; 442 } else { 443 cluster_offset = bdrv_getlength(bs->file->bs); 444 if (allocate == 1) { 445 /* round to cluster size */ 446 cluster_offset = (cluster_offset + s->cluster_size - 1) & 447 ~(s->cluster_size - 1); 448 bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size); 449 /* if encrypted, we must initialize the cluster 450 content which won't be written */ 451 if (bs->encrypted && 452 (n_end - n_start) < s->cluster_sectors) { 453 uint64_t start_sect; 454 assert(s->cipher); 455 start_sect = (offset & ~(s->cluster_size - 1)) >> 9; 456 memset(s->cluster_data + 512, 0x00, 512); 457 for(i = 0; i < s->cluster_sectors; i++) { 458 if (i < n_start || i >= n_end) { 459 Error *err = NULL; 460 if (encrypt_sectors(s, start_sect + i, 461 s->cluster_data, 462 s->cluster_data + 512, 1, 463 true, &err) < 0) { 464 error_free(err); 465 errno = EIO; 466 return -1; 467 } 468 if (bdrv_pwrite(bs->file->bs, 469 cluster_offset + i * 512, 470 s->cluster_data, 512) != 512) 471 return -1; 472 } 473 } 474 } 475 } else if (allocate == 2) { 476 cluster_offset |= QCOW_OFLAG_COMPRESSED | 477 (uint64_t)compressed_size << (63 - s->cluster_bits); 478 } 479 } 480 /* update L2 table */ 481 tmp = cpu_to_be64(cluster_offset); 482 l2_table[l2_index] = tmp; 483 if (bdrv_pwrite_sync(bs->file->bs, l2_offset + l2_index * sizeof(tmp), 484 &tmp, sizeof(tmp)) < 0) 485 return 0; 486 } 487 return cluster_offset; 488 } 489 490 static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, 491 int64_t sector_num, int nb_sectors, int *pnum) 492 { 493 BDRVQcowState *s = bs->opaque; 494 int index_in_cluster, n; 495 uint64_t cluster_offset; 496 497 qemu_co_mutex_lock(&s->lock); 498 cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); 499 qemu_co_mutex_unlock(&s->lock); 500 index_in_cluster = sector_num & (s->cluster_sectors - 1); 501 n = s->cluster_sectors - index_in_cluster; 502 if (n > nb_sectors) 503 n = nb_sectors; 504 *pnum = n; 505 if (!cluster_offset) { 506 return 0; 507 } 508 if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->cipher) { 509 return BDRV_BLOCK_DATA; 510 } 511 cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); 512 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset; 513 } 514 515 static int decompress_buffer(uint8_t *out_buf, int out_buf_size, 516 const uint8_t *buf, int buf_size) 517 { 518 z_stream strm1, *strm = &strm1; 519 int ret, out_len; 520 521 memset(strm, 0, sizeof(*strm)); 522 523 strm->next_in = (uint8_t *)buf; 524 strm->avail_in = buf_size; 525 strm->next_out = out_buf; 526 strm->avail_out = out_buf_size; 527 528 ret = inflateInit2(strm, -12); 529 if (ret != Z_OK) 530 return -1; 531 ret = inflate(strm, Z_FINISH); 532 out_len = strm->next_out - out_buf; 533 if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || 534 out_len != out_buf_size) { 535 inflateEnd(strm); 536 return -1; 537 } 538 inflateEnd(strm); 539 return 0; 540 } 541 542 static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) 543 { 544 BDRVQcowState *s = bs->opaque; 545 int ret, csize; 546 uint64_t coffset; 547 548 coffset = cluster_offset & s->cluster_offset_mask; 549 if (s->cluster_cache_offset != coffset) { 550 csize = cluster_offset >> (63 - s->cluster_bits); 551 csize &= (s->cluster_size - 1); 552 ret = bdrv_pread(bs->file->bs, coffset, s->cluster_data, csize); 553 if (ret != csize) 554 return -1; 555 if (decompress_buffer(s->cluster_cache, s->cluster_size, 556 s->cluster_data, csize) < 0) { 557 return -1; 558 } 559 s->cluster_cache_offset = coffset; 560 } 561 return 0; 562 } 563 564 static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, 565 int nb_sectors, QEMUIOVector *qiov) 566 { 567 BDRVQcowState *s = bs->opaque; 568 int index_in_cluster; 569 int ret = 0, n; 570 uint64_t cluster_offset; 571 struct iovec hd_iov; 572 QEMUIOVector hd_qiov; 573 uint8_t *buf; 574 void *orig_buf; 575 Error *err = NULL; 576 577 if (qiov->niov > 1) { 578 buf = orig_buf = qemu_try_blockalign(bs, qiov->size); 579 if (buf == NULL) { 580 return -ENOMEM; 581 } 582 } else { 583 orig_buf = NULL; 584 buf = (uint8_t *)qiov->iov->iov_base; 585 } 586 587 qemu_co_mutex_lock(&s->lock); 588 589 while (nb_sectors != 0) { 590 /* prepare next request */ 591 cluster_offset = get_cluster_offset(bs, sector_num << 9, 592 0, 0, 0, 0); 593 index_in_cluster = sector_num & (s->cluster_sectors - 1); 594 n = s->cluster_sectors - index_in_cluster; 595 if (n > nb_sectors) { 596 n = nb_sectors; 597 } 598 599 if (!cluster_offset) { 600 if (bs->backing) { 601 /* read from the base image */ 602 hd_iov.iov_base = (void *)buf; 603 hd_iov.iov_len = n * 512; 604 qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); 605 qemu_co_mutex_unlock(&s->lock); 606 ret = bdrv_co_readv(bs->backing->bs, sector_num, 607 n, &hd_qiov); 608 qemu_co_mutex_lock(&s->lock); 609 if (ret < 0) { 610 goto fail; 611 } 612 } else { 613 /* Note: in this case, no need to wait */ 614 memset(buf, 0, 512 * n); 615 } 616 } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { 617 /* add AIO support for compressed blocks ? */ 618 if (decompress_cluster(bs, cluster_offset) < 0) { 619 goto fail; 620 } 621 memcpy(buf, 622 s->cluster_cache + index_in_cluster * 512, 512 * n); 623 } else { 624 if ((cluster_offset & 511) != 0) { 625 goto fail; 626 } 627 hd_iov.iov_base = (void *)buf; 628 hd_iov.iov_len = n * 512; 629 qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); 630 qemu_co_mutex_unlock(&s->lock); 631 ret = bdrv_co_readv(bs->file->bs, 632 (cluster_offset >> 9) + index_in_cluster, 633 n, &hd_qiov); 634 qemu_co_mutex_lock(&s->lock); 635 if (ret < 0) { 636 break; 637 } 638 if (bs->encrypted) { 639 assert(s->cipher); 640 if (encrypt_sectors(s, sector_num, buf, buf, 641 n, false, &err) < 0) { 642 goto fail; 643 } 644 } 645 } 646 ret = 0; 647 648 nb_sectors -= n; 649 sector_num += n; 650 buf += n * 512; 651 } 652 653 done: 654 qemu_co_mutex_unlock(&s->lock); 655 656 if (qiov->niov > 1) { 657 qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size); 658 qemu_vfree(orig_buf); 659 } 660 661 return ret; 662 663 fail: 664 error_free(err); 665 ret = -EIO; 666 goto done; 667 } 668 669 static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, 670 int nb_sectors, QEMUIOVector *qiov) 671 { 672 BDRVQcowState *s = bs->opaque; 673 int index_in_cluster; 674 uint64_t cluster_offset; 675 const uint8_t *src_buf; 676 int ret = 0, n; 677 uint8_t *cluster_data = NULL; 678 struct iovec hd_iov; 679 QEMUIOVector hd_qiov; 680 uint8_t *buf; 681 void *orig_buf; 682 683 s->cluster_cache_offset = -1; /* disable compressed cache */ 684 685 if (qiov->niov > 1) { 686 buf = orig_buf = qemu_try_blockalign(bs, qiov->size); 687 if (buf == NULL) { 688 return -ENOMEM; 689 } 690 qemu_iovec_to_buf(qiov, 0, buf, qiov->size); 691 } else { 692 orig_buf = NULL; 693 buf = (uint8_t *)qiov->iov->iov_base; 694 } 695 696 qemu_co_mutex_lock(&s->lock); 697 698 while (nb_sectors != 0) { 699 700 index_in_cluster = sector_num & (s->cluster_sectors - 1); 701 n = s->cluster_sectors - index_in_cluster; 702 if (n > nb_sectors) { 703 n = nb_sectors; 704 } 705 cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0, 706 index_in_cluster, 707 index_in_cluster + n); 708 if (!cluster_offset || (cluster_offset & 511) != 0) { 709 ret = -EIO; 710 break; 711 } 712 if (bs->encrypted) { 713 Error *err = NULL; 714 assert(s->cipher); 715 if (!cluster_data) { 716 cluster_data = g_malloc0(s->cluster_size); 717 } 718 if (encrypt_sectors(s, sector_num, cluster_data, buf, 719 n, true, &err) < 0) { 720 error_free(err); 721 ret = -EIO; 722 break; 723 } 724 src_buf = cluster_data; 725 } else { 726 src_buf = buf; 727 } 728 729 hd_iov.iov_base = (void *)src_buf; 730 hd_iov.iov_len = n * 512; 731 qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); 732 qemu_co_mutex_unlock(&s->lock); 733 ret = bdrv_co_writev(bs->file->bs, 734 (cluster_offset >> 9) + index_in_cluster, 735 n, &hd_qiov); 736 qemu_co_mutex_lock(&s->lock); 737 if (ret < 0) { 738 break; 739 } 740 ret = 0; 741 742 nb_sectors -= n; 743 sector_num += n; 744 buf += n * 512; 745 } 746 qemu_co_mutex_unlock(&s->lock); 747 748 if (qiov->niov > 1) { 749 qemu_vfree(orig_buf); 750 } 751 g_free(cluster_data); 752 753 return ret; 754 } 755 756 static void qcow_close(BlockDriverState *bs) 757 { 758 BDRVQcowState *s = bs->opaque; 759 760 qcrypto_cipher_free(s->cipher); 761 s->cipher = NULL; 762 g_free(s->l1_table); 763 qemu_vfree(s->l2_cache); 764 g_free(s->cluster_cache); 765 g_free(s->cluster_data); 766 767 migrate_del_blocker(s->migration_blocker); 768 error_free(s->migration_blocker); 769 } 770 771 static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) 772 { 773 int header_size, backing_filename_len, l1_size, shift, i; 774 QCowHeader header; 775 uint8_t *tmp; 776 int64_t total_size = 0; 777 char *backing_file = NULL; 778 int flags = 0; 779 Error *local_err = NULL; 780 int ret; 781 BlockDriverState *qcow_bs; 782 783 /* Read out options */ 784 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), 785 BDRV_SECTOR_SIZE); 786 backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE); 787 if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) { 788 flags |= BLOCK_FLAG_ENCRYPT; 789 } 790 791 ret = bdrv_create_file(filename, opts, &local_err); 792 if (ret < 0) { 793 error_propagate(errp, local_err); 794 goto cleanup; 795 } 796 797 qcow_bs = NULL; 798 ret = bdrv_open(&qcow_bs, filename, NULL, NULL, 799 BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); 800 if (ret < 0) { 801 error_propagate(errp, local_err); 802 goto cleanup; 803 } 804 805 ret = bdrv_truncate(qcow_bs, 0); 806 if (ret < 0) { 807 goto exit; 808 } 809 810 memset(&header, 0, sizeof(header)); 811 header.magic = cpu_to_be32(QCOW_MAGIC); 812 header.version = cpu_to_be32(QCOW_VERSION); 813 header.size = cpu_to_be64(total_size); 814 header_size = sizeof(header); 815 backing_filename_len = 0; 816 if (backing_file) { 817 if (strcmp(backing_file, "fat:")) { 818 header.backing_file_offset = cpu_to_be64(header_size); 819 backing_filename_len = strlen(backing_file); 820 header.backing_file_size = cpu_to_be32(backing_filename_len); 821 header_size += backing_filename_len; 822 } else { 823 /* special backing file for vvfat */ 824 backing_file = NULL; 825 } 826 header.cluster_bits = 9; /* 512 byte cluster to avoid copying 827 unmodified sectors */ 828 header.l2_bits = 12; /* 32 KB L2 tables */ 829 } else { 830 header.cluster_bits = 12; /* 4 KB clusters */ 831 header.l2_bits = 9; /* 4 KB L2 tables */ 832 } 833 header_size = (header_size + 7) & ~7; 834 shift = header.cluster_bits + header.l2_bits; 835 l1_size = (total_size + (1LL << shift) - 1) >> shift; 836 837 header.l1_table_offset = cpu_to_be64(header_size); 838 if (flags & BLOCK_FLAG_ENCRYPT) { 839 header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); 840 } else { 841 header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 842 } 843 844 /* write all the data */ 845 ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header)); 846 if (ret != sizeof(header)) { 847 goto exit; 848 } 849 850 if (backing_file) { 851 ret = bdrv_pwrite(qcow_bs, sizeof(header), 852 backing_file, backing_filename_len); 853 if (ret != backing_filename_len) { 854 goto exit; 855 } 856 } 857 858 tmp = g_malloc0(BDRV_SECTOR_SIZE); 859 for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/ 860 BDRV_SECTOR_SIZE); i++) { 861 ret = bdrv_pwrite(qcow_bs, header_size + 862 BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE); 863 if (ret != BDRV_SECTOR_SIZE) { 864 g_free(tmp); 865 goto exit; 866 } 867 } 868 869 g_free(tmp); 870 ret = 0; 871 exit: 872 bdrv_unref(qcow_bs); 873 cleanup: 874 g_free(backing_file); 875 return ret; 876 } 877 878 static int qcow_make_empty(BlockDriverState *bs) 879 { 880 BDRVQcowState *s = bs->opaque; 881 uint32_t l1_length = s->l1_size * sizeof(uint64_t); 882 int ret; 883 884 memset(s->l1_table, 0, l1_length); 885 if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, s->l1_table, 886 l1_length) < 0) 887 return -1; 888 ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length); 889 if (ret < 0) 890 return ret; 891 892 memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); 893 memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); 894 memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); 895 896 return 0; 897 } 898 899 /* XXX: put compressed sectors first, then all the cluster aligned 900 tables to avoid losing bytes in alignment */ 901 static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, 902 const uint8_t *buf, int nb_sectors) 903 { 904 BDRVQcowState *s = bs->opaque; 905 z_stream strm; 906 int ret, out_len; 907 uint8_t *out_buf; 908 uint64_t cluster_offset; 909 910 if (nb_sectors != s->cluster_sectors) { 911 ret = -EINVAL; 912 913 /* Zero-pad last write if image size is not cluster aligned */ 914 if (sector_num + nb_sectors == bs->total_sectors && 915 nb_sectors < s->cluster_sectors) { 916 uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size); 917 memset(pad_buf, 0, s->cluster_size); 918 memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE); 919 ret = qcow_write_compressed(bs, sector_num, 920 pad_buf, s->cluster_sectors); 921 qemu_vfree(pad_buf); 922 } 923 return ret; 924 } 925 926 out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); 927 928 /* best compression, small window, no zlib header */ 929 memset(&strm, 0, sizeof(strm)); 930 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, 931 Z_DEFLATED, -12, 932 9, Z_DEFAULT_STRATEGY); 933 if (ret != 0) { 934 ret = -EINVAL; 935 goto fail; 936 } 937 938 strm.avail_in = s->cluster_size; 939 strm.next_in = (uint8_t *)buf; 940 strm.avail_out = s->cluster_size; 941 strm.next_out = out_buf; 942 943 ret = deflate(&strm, Z_FINISH); 944 if (ret != Z_STREAM_END && ret != Z_OK) { 945 deflateEnd(&strm); 946 ret = -EINVAL; 947 goto fail; 948 } 949 out_len = strm.next_out - out_buf; 950 951 deflateEnd(&strm); 952 953 if (ret != Z_STREAM_END || out_len >= s->cluster_size) { 954 /* could not compress: write normal cluster */ 955 ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors); 956 if (ret < 0) { 957 goto fail; 958 } 959 } else { 960 cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, 961 out_len, 0, 0); 962 if (cluster_offset == 0) { 963 ret = -EIO; 964 goto fail; 965 } 966 967 cluster_offset &= s->cluster_offset_mask; 968 ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len); 969 if (ret < 0) { 970 goto fail; 971 } 972 } 973 974 ret = 0; 975 fail: 976 g_free(out_buf); 977 return ret; 978 } 979 980 static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 981 { 982 BDRVQcowState *s = bs->opaque; 983 bdi->cluster_size = s->cluster_size; 984 return 0; 985 } 986 987 static QemuOptsList qcow_create_opts = { 988 .name = "qcow-create-opts", 989 .head = QTAILQ_HEAD_INITIALIZER(qcow_create_opts.head), 990 .desc = { 991 { 992 .name = BLOCK_OPT_SIZE, 993 .type = QEMU_OPT_SIZE, 994 .help = "Virtual disk size" 995 }, 996 { 997 .name = BLOCK_OPT_BACKING_FILE, 998 .type = QEMU_OPT_STRING, 999 .help = "File name of a base image" 1000 }, 1001 { 1002 .name = BLOCK_OPT_ENCRYPT, 1003 .type = QEMU_OPT_BOOL, 1004 .help = "Encrypt the image", 1005 .def_value_str = "off" 1006 }, 1007 { /* end of list */ } 1008 } 1009 }; 1010 1011 static BlockDriver bdrv_qcow = { 1012 .format_name = "qcow", 1013 .instance_size = sizeof(BDRVQcowState), 1014 .bdrv_probe = qcow_probe, 1015 .bdrv_open = qcow_open, 1016 .bdrv_close = qcow_close, 1017 .bdrv_reopen_prepare = qcow_reopen_prepare, 1018 .bdrv_create = qcow_create, 1019 .bdrv_has_zero_init = bdrv_has_zero_init_1, 1020 .supports_backing = true, 1021 1022 .bdrv_co_readv = qcow_co_readv, 1023 .bdrv_co_writev = qcow_co_writev, 1024 .bdrv_co_get_block_status = qcow_co_get_block_status, 1025 1026 .bdrv_set_key = qcow_set_key, 1027 .bdrv_make_empty = qcow_make_empty, 1028 .bdrv_write_compressed = qcow_write_compressed, 1029 .bdrv_get_info = qcow_get_info, 1030 1031 .create_opts = &qcow_create_opts, 1032 }; 1033 1034 static void bdrv_qcow_init(void) 1035 { 1036 bdrv_register(&bdrv_qcow); 1037 } 1038 1039 block_init(bdrv_qcow_init); 1040