1 /* 2 * Block driver for the QCOW format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #include "qemu-common.h" 25 #include "block_int.h" 26 #include "module.h" 27 #include <zlib.h> 28 #include "aes.h" 29 30 /**************************************************************/ 31 /* QEMU COW block driver with compression and encryption support */ 32 33 #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) 34 #define QCOW_VERSION 1 35 36 #define QCOW_CRYPT_NONE 0 37 #define QCOW_CRYPT_AES 1 38 39 #define QCOW_OFLAG_COMPRESSED (1LL << 63) 40 41 typedef struct QCowHeader { 42 uint32_t magic; 43 uint32_t version; 44 uint64_t backing_file_offset; 45 uint32_t backing_file_size; 46 uint32_t mtime; 47 uint64_t size; /* in bytes */ 48 uint8_t cluster_bits; 49 uint8_t l2_bits; 50 uint32_t crypt_method; 51 uint64_t l1_table_offset; 52 } QCowHeader; 53 54 #define L2_CACHE_SIZE 16 55 56 typedef struct BDRVQcowState { 57 int cluster_bits; 58 int cluster_size; 59 int cluster_sectors; 60 int l2_bits; 61 int l2_size; 62 int l1_size; 63 uint64_t cluster_offset_mask; 64 uint64_t l1_table_offset; 65 uint64_t *l1_table; 66 uint64_t *l2_cache; 67 uint64_t l2_cache_offsets[L2_CACHE_SIZE]; 68 uint32_t l2_cache_counts[L2_CACHE_SIZE]; 69 uint8_t *cluster_cache; 70 uint8_t *cluster_data; 71 uint64_t cluster_cache_offset; 72 uint32_t crypt_method; /* current crypt method, 0 if no key yet */ 73 uint32_t crypt_method_header; 74 AES_KEY aes_encrypt_key; 75 AES_KEY aes_decrypt_key; 76 } BDRVQcowState; 77 78 static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); 79 80 static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) 81 { 82 const QCowHeader *cow_header = (const void *)buf; 83 84 if (buf_size >= sizeof(QCowHeader) && 85 be32_to_cpu(cow_header->magic) == QCOW_MAGIC && 86 be32_to_cpu(cow_header->version) == QCOW_VERSION) 87 return 100; 88 else 89 return 0; 90 } 91 92 static int qcow_open(BlockDriverState *bs, int flags) 93 { 94 BDRVQcowState *s = bs->opaque; 95 int len, i, shift; 96 QCowHeader header; 97 98 if (bdrv_pread(bs->file, 0, &header, sizeof(header)) != sizeof(header)) 99 goto fail; 100 be32_to_cpus(&header.magic); 101 be32_to_cpus(&header.version); 102 be64_to_cpus(&header.backing_file_offset); 103 be32_to_cpus(&header.backing_file_size); 104 be32_to_cpus(&header.mtime); 105 be64_to_cpus(&header.size); 106 be32_to_cpus(&header.crypt_method); 107 be64_to_cpus(&header.l1_table_offset); 108 109 if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION) 110 goto fail; 111 if (header.size <= 1 || header.cluster_bits < 9) 112 goto fail; 113 if (header.crypt_method > QCOW_CRYPT_AES) 114 goto fail; 115 s->crypt_method_header = header.crypt_method; 116 if (s->crypt_method_header) 117 bs->encrypted = 1; 118 s->cluster_bits = header.cluster_bits; 119 s->cluster_size = 1 << s->cluster_bits; 120 s->cluster_sectors = 1 << (s->cluster_bits - 9); 121 s->l2_bits = header.l2_bits; 122 s->l2_size = 1 << s->l2_bits; 123 bs->total_sectors = header.size / 512; 124 s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; 125 126 /* read the level 1 table */ 127 shift = s->cluster_bits + s->l2_bits; 128 s->l1_size = (header.size + (1LL << shift) - 1) >> shift; 129 130 s->l1_table_offset = header.l1_table_offset; 131 s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); 132 if (!s->l1_table) 133 goto fail; 134 if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != 135 s->l1_size * sizeof(uint64_t)) 136 goto fail; 137 for(i = 0;i < s->l1_size; i++) { 138 be64_to_cpus(&s->l1_table[i]); 139 } 140 /* alloc L2 cache */ 141 s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); 142 if (!s->l2_cache) 143 goto fail; 144 s->cluster_cache = qemu_malloc(s->cluster_size); 145 if (!s->cluster_cache) 146 goto fail; 147 s->cluster_data = qemu_malloc(s->cluster_size); 148 if (!s->cluster_data) 149 goto fail; 150 s->cluster_cache_offset = -1; 151 152 /* read the backing file name */ 153 if (header.backing_file_offset != 0) { 154 len = header.backing_file_size; 155 if (len > 1023) 156 len = 1023; 157 if (bdrv_pread(bs->file, header.backing_file_offset, bs->backing_file, len) != len) 158 goto fail; 159 bs->backing_file[len] = '\0'; 160 } 161 return 0; 162 163 fail: 164 qemu_free(s->l1_table); 165 qemu_free(s->l2_cache); 166 qemu_free(s->cluster_cache); 167 qemu_free(s->cluster_data); 168 return -1; 169 } 170 171 static int qcow_set_key(BlockDriverState *bs, const char *key) 172 { 173 BDRVQcowState *s = bs->opaque; 174 uint8_t keybuf[16]; 175 int len, i; 176 177 memset(keybuf, 0, 16); 178 len = strlen(key); 179 if (len > 16) 180 len = 16; 181 /* XXX: we could compress the chars to 7 bits to increase 182 entropy */ 183 for(i = 0;i < len;i++) { 184 keybuf[i] = key[i]; 185 } 186 s->crypt_method = s->crypt_method_header; 187 188 if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) 189 return -1; 190 if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) 191 return -1; 192 #if 0 193 /* test */ 194 { 195 uint8_t in[16]; 196 uint8_t out[16]; 197 uint8_t tmp[16]; 198 for(i=0;i<16;i++) 199 in[i] = i; 200 AES_encrypt(in, tmp, &s->aes_encrypt_key); 201 AES_decrypt(tmp, out, &s->aes_decrypt_key); 202 for(i = 0; i < 16; i++) 203 printf(" %02x", tmp[i]); 204 printf("\n"); 205 for(i = 0; i < 16; i++) 206 printf(" %02x", out[i]); 207 printf("\n"); 208 } 209 #endif 210 return 0; 211 } 212 213 /* The crypt function is compatible with the linux cryptoloop 214 algorithm for < 4 GB images. NOTE: out_buf == in_buf is 215 supported */ 216 static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num, 217 uint8_t *out_buf, const uint8_t *in_buf, 218 int nb_sectors, int enc, 219 const AES_KEY *key) 220 { 221 union { 222 uint64_t ll[2]; 223 uint8_t b[16]; 224 } ivec; 225 int i; 226 227 for(i = 0; i < nb_sectors; i++) { 228 ivec.ll[0] = cpu_to_le64(sector_num); 229 ivec.ll[1] = 0; 230 AES_cbc_encrypt(in_buf, out_buf, 512, key, 231 ivec.b, enc); 232 sector_num++; 233 in_buf += 512; 234 out_buf += 512; 235 } 236 } 237 238 /* 'allocate' is: 239 * 240 * 0 to not allocate. 241 * 242 * 1 to allocate a normal cluster (for sector indexes 'n_start' to 243 * 'n_end') 244 * 245 * 2 to allocate a compressed cluster of size 246 * 'compressed_size'. 'compressed_size' must be > 0 and < 247 * cluster_size 248 * 249 * return 0 if not allocated. 250 */ 251 static uint64_t get_cluster_offset(BlockDriverState *bs, 252 uint64_t offset, int allocate, 253 int compressed_size, 254 int n_start, int n_end) 255 { 256 BDRVQcowState *s = bs->opaque; 257 int min_index, i, j, l1_index, l2_index; 258 uint64_t l2_offset, *l2_table, cluster_offset, tmp; 259 uint32_t min_count; 260 int new_l2_table; 261 262 l1_index = offset >> (s->l2_bits + s->cluster_bits); 263 l2_offset = s->l1_table[l1_index]; 264 new_l2_table = 0; 265 if (!l2_offset) { 266 if (!allocate) 267 return 0; 268 /* allocate a new l2 entry */ 269 l2_offset = bdrv_getlength(bs->file); 270 /* round to cluster size */ 271 l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); 272 /* update the L1 entry */ 273 s->l1_table[l1_index] = l2_offset; 274 tmp = cpu_to_be64(l2_offset); 275 if (bdrv_pwrite_sync(bs->file, 276 s->l1_table_offset + l1_index * sizeof(tmp), 277 &tmp, sizeof(tmp)) < 0) 278 return 0; 279 new_l2_table = 1; 280 } 281 for(i = 0; i < L2_CACHE_SIZE; i++) { 282 if (l2_offset == s->l2_cache_offsets[i]) { 283 /* increment the hit count */ 284 if (++s->l2_cache_counts[i] == 0xffffffff) { 285 for(j = 0; j < L2_CACHE_SIZE; j++) { 286 s->l2_cache_counts[j] >>= 1; 287 } 288 } 289 l2_table = s->l2_cache + (i << s->l2_bits); 290 goto found; 291 } 292 } 293 /* not found: load a new entry in the least used one */ 294 min_index = 0; 295 min_count = 0xffffffff; 296 for(i = 0; i < L2_CACHE_SIZE; i++) { 297 if (s->l2_cache_counts[i] < min_count) { 298 min_count = s->l2_cache_counts[i]; 299 min_index = i; 300 } 301 } 302 l2_table = s->l2_cache + (min_index << s->l2_bits); 303 if (new_l2_table) { 304 memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); 305 if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table, 306 s->l2_size * sizeof(uint64_t)) < 0) 307 return 0; 308 } else { 309 if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != 310 s->l2_size * sizeof(uint64_t)) 311 return 0; 312 } 313 s->l2_cache_offsets[min_index] = l2_offset; 314 s->l2_cache_counts[min_index] = 1; 315 found: 316 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); 317 cluster_offset = be64_to_cpu(l2_table[l2_index]); 318 if (!cluster_offset || 319 ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) { 320 if (!allocate) 321 return 0; 322 /* allocate a new cluster */ 323 if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && 324 (n_end - n_start) < s->cluster_sectors) { 325 /* if the cluster is already compressed, we must 326 decompress it in the case it is not completely 327 overwritten */ 328 if (decompress_cluster(bs, cluster_offset) < 0) 329 return 0; 330 cluster_offset = bdrv_getlength(bs->file); 331 cluster_offset = (cluster_offset + s->cluster_size - 1) & 332 ~(s->cluster_size - 1); 333 /* write the cluster content */ 334 if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) != 335 s->cluster_size) 336 return -1; 337 } else { 338 cluster_offset = bdrv_getlength(bs->file); 339 if (allocate == 1) { 340 /* round to cluster size */ 341 cluster_offset = (cluster_offset + s->cluster_size - 1) & 342 ~(s->cluster_size - 1); 343 bdrv_truncate(bs->file, cluster_offset + s->cluster_size); 344 /* if encrypted, we must initialize the cluster 345 content which won't be written */ 346 if (s->crypt_method && 347 (n_end - n_start) < s->cluster_sectors) { 348 uint64_t start_sect; 349 start_sect = (offset & ~(s->cluster_size - 1)) >> 9; 350 memset(s->cluster_data + 512, 0x00, 512); 351 for(i = 0; i < s->cluster_sectors; i++) { 352 if (i < n_start || i >= n_end) { 353 encrypt_sectors(s, start_sect + i, 354 s->cluster_data, 355 s->cluster_data + 512, 1, 1, 356 &s->aes_encrypt_key); 357 if (bdrv_pwrite(bs->file, cluster_offset + i * 512, 358 s->cluster_data, 512) != 512) 359 return -1; 360 } 361 } 362 } 363 } else if (allocate == 2) { 364 cluster_offset |= QCOW_OFLAG_COMPRESSED | 365 (uint64_t)compressed_size << (63 - s->cluster_bits); 366 } 367 } 368 /* update L2 table */ 369 tmp = cpu_to_be64(cluster_offset); 370 l2_table[l2_index] = tmp; 371 if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp), 372 &tmp, sizeof(tmp)) < 0) 373 return 0; 374 } 375 return cluster_offset; 376 } 377 378 static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, 379 int nb_sectors, int *pnum) 380 { 381 BDRVQcowState *s = bs->opaque; 382 int index_in_cluster, n; 383 uint64_t cluster_offset; 384 385 cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); 386 index_in_cluster = sector_num & (s->cluster_sectors - 1); 387 n = s->cluster_sectors - index_in_cluster; 388 if (n > nb_sectors) 389 n = nb_sectors; 390 *pnum = n; 391 return (cluster_offset != 0); 392 } 393 394 static int decompress_buffer(uint8_t *out_buf, int out_buf_size, 395 const uint8_t *buf, int buf_size) 396 { 397 z_stream strm1, *strm = &strm1; 398 int ret, out_len; 399 400 memset(strm, 0, sizeof(*strm)); 401 402 strm->next_in = (uint8_t *)buf; 403 strm->avail_in = buf_size; 404 strm->next_out = out_buf; 405 strm->avail_out = out_buf_size; 406 407 ret = inflateInit2(strm, -12); 408 if (ret != Z_OK) 409 return -1; 410 ret = inflate(strm, Z_FINISH); 411 out_len = strm->next_out - out_buf; 412 if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || 413 out_len != out_buf_size) { 414 inflateEnd(strm); 415 return -1; 416 } 417 inflateEnd(strm); 418 return 0; 419 } 420 421 static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) 422 { 423 BDRVQcowState *s = bs->opaque; 424 int ret, csize; 425 uint64_t coffset; 426 427 coffset = cluster_offset & s->cluster_offset_mask; 428 if (s->cluster_cache_offset != coffset) { 429 csize = cluster_offset >> (63 - s->cluster_bits); 430 csize &= (s->cluster_size - 1); 431 ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize); 432 if (ret != csize) 433 return -1; 434 if (decompress_buffer(s->cluster_cache, s->cluster_size, 435 s->cluster_data, csize) < 0) { 436 return -1; 437 } 438 s->cluster_cache_offset = coffset; 439 } 440 return 0; 441 } 442 443 #if 0 444 445 static int qcow_read(BlockDriverState *bs, int64_t sector_num, 446 uint8_t *buf, int nb_sectors) 447 { 448 BDRVQcowState *s = bs->opaque; 449 int ret, index_in_cluster, n; 450 uint64_t cluster_offset; 451 452 while (nb_sectors > 0) { 453 cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); 454 index_in_cluster = sector_num & (s->cluster_sectors - 1); 455 n = s->cluster_sectors - index_in_cluster; 456 if (n > nb_sectors) 457 n = nb_sectors; 458 if (!cluster_offset) { 459 if (bs->backing_hd) { 460 /* read from the base image */ 461 ret = bdrv_read(bs->backing_hd, sector_num, buf, n); 462 if (ret < 0) 463 return -1; 464 } else { 465 memset(buf, 0, 512 * n); 466 } 467 } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { 468 if (decompress_cluster(bs, cluster_offset) < 0) 469 return -1; 470 memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n); 471 } else { 472 ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512); 473 if (ret != n * 512) 474 return -1; 475 if (s->crypt_method) { 476 encrypt_sectors(s, sector_num, buf, buf, n, 0, 477 &s->aes_decrypt_key); 478 } 479 } 480 nb_sectors -= n; 481 sector_num += n; 482 buf += n * 512; 483 } 484 return 0; 485 } 486 #endif 487 488 typedef struct QCowAIOCB { 489 BlockDriverAIOCB common; 490 int64_t sector_num; 491 QEMUIOVector *qiov; 492 uint8_t *buf; 493 void *orig_buf; 494 int nb_sectors; 495 int n; 496 uint64_t cluster_offset; 497 uint8_t *cluster_data; 498 struct iovec hd_iov; 499 bool is_write; 500 QEMUBH *bh; 501 QEMUIOVector hd_qiov; 502 BlockDriverAIOCB *hd_aiocb; 503 } QCowAIOCB; 504 505 static void qcow_aio_cancel(BlockDriverAIOCB *blockacb) 506 { 507 QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common); 508 if (acb->hd_aiocb) 509 bdrv_aio_cancel(acb->hd_aiocb); 510 qemu_aio_release(acb); 511 } 512 513 static AIOPool qcow_aio_pool = { 514 .aiocb_size = sizeof(QCowAIOCB), 515 .cancel = qcow_aio_cancel, 516 }; 517 518 static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, 519 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 520 BlockDriverCompletionFunc *cb, void *opaque, int is_write) 521 { 522 QCowAIOCB *acb; 523 524 acb = qemu_aio_get(&qcow_aio_pool, bs, cb, opaque); 525 if (!acb) 526 return NULL; 527 acb->hd_aiocb = NULL; 528 acb->sector_num = sector_num; 529 acb->qiov = qiov; 530 acb->is_write = is_write; 531 532 if (qiov->niov > 1) { 533 acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); 534 if (is_write) 535 qemu_iovec_to_buffer(qiov, acb->buf); 536 } else { 537 acb->buf = (uint8_t *)qiov->iov->iov_base; 538 } 539 acb->nb_sectors = nb_sectors; 540 acb->n = 0; 541 acb->cluster_offset = 0; 542 return acb; 543 } 544 545 static void qcow_aio_read_cb(void *opaque, int ret); 546 static void qcow_aio_write_cb(void *opaque, int ret); 547 548 static void qcow_aio_rw_bh(void *opaque) 549 { 550 QCowAIOCB *acb = opaque; 551 qemu_bh_delete(acb->bh); 552 acb->bh = NULL; 553 554 if (acb->is_write) { 555 qcow_aio_write_cb(opaque, 0); 556 } else { 557 qcow_aio_read_cb(opaque, 0); 558 } 559 } 560 561 static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb) 562 { 563 if (acb->bh) { 564 return -EIO; 565 } 566 567 acb->bh = qemu_bh_new(cb, acb); 568 if (!acb->bh) { 569 return -EIO; 570 } 571 572 qemu_bh_schedule(acb->bh); 573 574 return 0; 575 } 576 577 static void qcow_aio_read_cb(void *opaque, int ret) 578 { 579 QCowAIOCB *acb = opaque; 580 BlockDriverState *bs = acb->common.bs; 581 BDRVQcowState *s = bs->opaque; 582 int index_in_cluster; 583 584 acb->hd_aiocb = NULL; 585 if (ret < 0) 586 goto done; 587 588 redo: 589 /* post process the read buffer */ 590 if (!acb->cluster_offset) { 591 /* nothing to do */ 592 } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { 593 /* nothing to do */ 594 } else { 595 if (s->crypt_method) { 596 encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf, 597 acb->n, 0, 598 &s->aes_decrypt_key); 599 } 600 } 601 602 acb->nb_sectors -= acb->n; 603 acb->sector_num += acb->n; 604 acb->buf += acb->n * 512; 605 606 if (acb->nb_sectors == 0) { 607 /* request completed */ 608 ret = 0; 609 goto done; 610 } 611 612 /* prepare next AIO request */ 613 acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, 614 0, 0, 0, 0); 615 index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); 616 acb->n = s->cluster_sectors - index_in_cluster; 617 if (acb->n > acb->nb_sectors) 618 acb->n = acb->nb_sectors; 619 620 if (!acb->cluster_offset) { 621 if (bs->backing_hd) { 622 /* read from the base image */ 623 acb->hd_iov.iov_base = (void *)acb->buf; 624 acb->hd_iov.iov_len = acb->n * 512; 625 qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); 626 acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, 627 &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); 628 if (acb->hd_aiocb == NULL) { 629 ret = -EIO; 630 goto done; 631 } 632 } else { 633 /* Note: in this case, no need to wait */ 634 memset(acb->buf, 0, 512 * acb->n); 635 goto redo; 636 } 637 } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { 638 /* add AIO support for compressed blocks ? */ 639 if (decompress_cluster(bs, acb->cluster_offset) < 0) { 640 ret = -EIO; 641 goto done; 642 } 643 memcpy(acb->buf, 644 s->cluster_cache + index_in_cluster * 512, 512 * acb->n); 645 goto redo; 646 } else { 647 if ((acb->cluster_offset & 511) != 0) { 648 ret = -EIO; 649 goto done; 650 } 651 acb->hd_iov.iov_base = (void *)acb->buf; 652 acb->hd_iov.iov_len = acb->n * 512; 653 qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); 654 acb->hd_aiocb = bdrv_aio_readv(bs->file, 655 (acb->cluster_offset >> 9) + index_in_cluster, 656 &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); 657 if (acb->hd_aiocb == NULL) { 658 ret = -EIO; 659 goto done; 660 } 661 } 662 663 return; 664 665 done: 666 if (acb->qiov->niov > 1) { 667 qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); 668 qemu_vfree(acb->orig_buf); 669 } 670 acb->common.cb(acb->common.opaque, ret); 671 qemu_aio_release(acb); 672 } 673 674 static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, 675 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 676 BlockDriverCompletionFunc *cb, void *opaque) 677 { 678 QCowAIOCB *acb; 679 int ret; 680 681 acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 682 if (!acb) 683 return NULL; 684 685 ret = qcow_schedule_bh(qcow_aio_rw_bh, acb); 686 if (ret < 0) { 687 if (acb->qiov->niov > 1) { 688 qemu_vfree(acb->orig_buf); 689 } 690 qemu_aio_release(acb); 691 return NULL; 692 } 693 694 return &acb->common; 695 } 696 697 static void qcow_aio_write_cb(void *opaque, int ret) 698 { 699 QCowAIOCB *acb = opaque; 700 BlockDriverState *bs = acb->common.bs; 701 BDRVQcowState *s = bs->opaque; 702 int index_in_cluster; 703 uint64_t cluster_offset; 704 const uint8_t *src_buf; 705 706 acb->hd_aiocb = NULL; 707 708 if (ret < 0) 709 goto done; 710 711 acb->nb_sectors -= acb->n; 712 acb->sector_num += acb->n; 713 acb->buf += acb->n * 512; 714 715 if (acb->nb_sectors == 0) { 716 /* request completed */ 717 ret = 0; 718 goto done; 719 } 720 721 index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); 722 acb->n = s->cluster_sectors - index_in_cluster; 723 if (acb->n > acb->nb_sectors) 724 acb->n = acb->nb_sectors; 725 cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, 1, 0, 726 index_in_cluster, 727 index_in_cluster + acb->n); 728 if (!cluster_offset || (cluster_offset & 511) != 0) { 729 ret = -EIO; 730 goto done; 731 } 732 if (s->crypt_method) { 733 if (!acb->cluster_data) { 734 acb->cluster_data = qemu_mallocz(s->cluster_size); 735 if (!acb->cluster_data) { 736 ret = -ENOMEM; 737 goto done; 738 } 739 } 740 encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, 741 acb->n, 1, &s->aes_encrypt_key); 742 src_buf = acb->cluster_data; 743 } else { 744 src_buf = acb->buf; 745 } 746 747 acb->hd_iov.iov_base = (void *)src_buf; 748 acb->hd_iov.iov_len = acb->n * 512; 749 qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); 750 acb->hd_aiocb = bdrv_aio_writev(bs->file, 751 (cluster_offset >> 9) + index_in_cluster, 752 &acb->hd_qiov, acb->n, 753 qcow_aio_write_cb, acb); 754 if (acb->hd_aiocb == NULL) { 755 ret = -EIO; 756 goto done; 757 } 758 return; 759 760 done: 761 if (acb->qiov->niov > 1) 762 qemu_vfree(acb->orig_buf); 763 acb->common.cb(acb->common.opaque, ret); 764 qemu_aio_release(acb); 765 } 766 767 static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, 768 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 769 BlockDriverCompletionFunc *cb, void *opaque) 770 { 771 BDRVQcowState *s = bs->opaque; 772 QCowAIOCB *acb; 773 int ret; 774 775 s->cluster_cache_offset = -1; /* disable compressed cache */ 776 777 acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 778 if (!acb) 779 return NULL; 780 781 782 ret = qcow_schedule_bh(qcow_aio_rw_bh, acb); 783 if (ret < 0) { 784 if (acb->qiov->niov > 1) { 785 qemu_vfree(acb->orig_buf); 786 } 787 qemu_aio_release(acb); 788 return NULL; 789 } 790 791 return &acb->common; 792 } 793 794 static void qcow_close(BlockDriverState *bs) 795 { 796 BDRVQcowState *s = bs->opaque; 797 qemu_free(s->l1_table); 798 qemu_free(s->l2_cache); 799 qemu_free(s->cluster_cache); 800 qemu_free(s->cluster_data); 801 } 802 803 static int qcow_create(const char *filename, QEMUOptionParameter *options) 804 { 805 int fd, header_size, backing_filename_len, l1_size, i, shift; 806 QCowHeader header; 807 uint64_t tmp; 808 int64_t total_size = 0; 809 const char *backing_file = NULL; 810 int flags = 0; 811 int ret; 812 813 /* Read out options */ 814 while (options && options->name) { 815 if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 816 total_size = options->value.n / 512; 817 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { 818 backing_file = options->value.s; 819 } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) { 820 flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0; 821 } 822 options++; 823 } 824 825 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); 826 if (fd < 0) 827 return -errno; 828 memset(&header, 0, sizeof(header)); 829 header.magic = cpu_to_be32(QCOW_MAGIC); 830 header.version = cpu_to_be32(QCOW_VERSION); 831 header.size = cpu_to_be64(total_size * 512); 832 header_size = sizeof(header); 833 backing_filename_len = 0; 834 if (backing_file) { 835 if (strcmp(backing_file, "fat:")) { 836 header.backing_file_offset = cpu_to_be64(header_size); 837 backing_filename_len = strlen(backing_file); 838 header.backing_file_size = cpu_to_be32(backing_filename_len); 839 header_size += backing_filename_len; 840 } else { 841 /* special backing file for vvfat */ 842 backing_file = NULL; 843 } 844 header.cluster_bits = 9; /* 512 byte cluster to avoid copying 845 unmodifyed sectors */ 846 header.l2_bits = 12; /* 32 KB L2 tables */ 847 } else { 848 header.cluster_bits = 12; /* 4 KB clusters */ 849 header.l2_bits = 9; /* 4 KB L2 tables */ 850 } 851 header_size = (header_size + 7) & ~7; 852 shift = header.cluster_bits + header.l2_bits; 853 l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift; 854 855 header.l1_table_offset = cpu_to_be64(header_size); 856 if (flags & BLOCK_FLAG_ENCRYPT) { 857 header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); 858 } else { 859 header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); 860 } 861 862 /* write all the data */ 863 ret = qemu_write_full(fd, &header, sizeof(header)); 864 if (ret != sizeof(header)) { 865 ret = -errno; 866 goto exit; 867 } 868 869 if (backing_file) { 870 ret = qemu_write_full(fd, backing_file, backing_filename_len); 871 if (ret != backing_filename_len) { 872 ret = -errno; 873 goto exit; 874 } 875 876 } 877 lseek(fd, header_size, SEEK_SET); 878 tmp = 0; 879 for(i = 0;i < l1_size; i++) { 880 ret = qemu_write_full(fd, &tmp, sizeof(tmp)); 881 if (ret != sizeof(tmp)) { 882 ret = -errno; 883 goto exit; 884 } 885 } 886 887 ret = 0; 888 exit: 889 close(fd); 890 return ret; 891 } 892 893 static int qcow_make_empty(BlockDriverState *bs) 894 { 895 BDRVQcowState *s = bs->opaque; 896 uint32_t l1_length = s->l1_size * sizeof(uint64_t); 897 int ret; 898 899 memset(s->l1_table, 0, l1_length); 900 if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table, 901 l1_length) < 0) 902 return -1; 903 ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length); 904 if (ret < 0) 905 return ret; 906 907 memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); 908 memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); 909 memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); 910 911 return 0; 912 } 913 914 /* XXX: put compressed sectors first, then all the cluster aligned 915 tables to avoid losing bytes in alignment */ 916 static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, 917 const uint8_t *buf, int nb_sectors) 918 { 919 BDRVQcowState *s = bs->opaque; 920 z_stream strm; 921 int ret, out_len; 922 uint8_t *out_buf; 923 uint64_t cluster_offset; 924 925 if (nb_sectors != s->cluster_sectors) 926 return -EINVAL; 927 928 out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); 929 if (!out_buf) 930 return -1; 931 932 /* best compression, small window, no zlib header */ 933 memset(&strm, 0, sizeof(strm)); 934 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, 935 Z_DEFLATED, -12, 936 9, Z_DEFAULT_STRATEGY); 937 if (ret != 0) { 938 qemu_free(out_buf); 939 return -1; 940 } 941 942 strm.avail_in = s->cluster_size; 943 strm.next_in = (uint8_t *)buf; 944 strm.avail_out = s->cluster_size; 945 strm.next_out = out_buf; 946 947 ret = deflate(&strm, Z_FINISH); 948 if (ret != Z_STREAM_END && ret != Z_OK) { 949 qemu_free(out_buf); 950 deflateEnd(&strm); 951 return -1; 952 } 953 out_len = strm.next_out - out_buf; 954 955 deflateEnd(&strm); 956 957 if (ret != Z_STREAM_END || out_len >= s->cluster_size) { 958 /* could not compress: write normal cluster */ 959 bdrv_write(bs, sector_num, buf, s->cluster_sectors); 960 } else { 961 cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, 962 out_len, 0, 0); 963 cluster_offset &= s->cluster_offset_mask; 964 if (bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len) != out_len) { 965 qemu_free(out_buf); 966 return -1; 967 } 968 } 969 970 qemu_free(out_buf); 971 return 0; 972 } 973 974 static int qcow_flush(BlockDriverState *bs) 975 { 976 return bdrv_flush(bs->file); 977 } 978 979 static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs, 980 BlockDriverCompletionFunc *cb, void *opaque) 981 { 982 return bdrv_aio_flush(bs->file, cb, opaque); 983 } 984 985 static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 986 { 987 BDRVQcowState *s = bs->opaque; 988 bdi->cluster_size = s->cluster_size; 989 return 0; 990 } 991 992 993 static QEMUOptionParameter qcow_create_options[] = { 994 { 995 .name = BLOCK_OPT_SIZE, 996 .type = OPT_SIZE, 997 .help = "Virtual disk size" 998 }, 999 { 1000 .name = BLOCK_OPT_BACKING_FILE, 1001 .type = OPT_STRING, 1002 .help = "File name of a base image" 1003 }, 1004 { 1005 .name = BLOCK_OPT_ENCRYPT, 1006 .type = OPT_FLAG, 1007 .help = "Encrypt the image" 1008 }, 1009 { NULL } 1010 }; 1011 1012 static BlockDriver bdrv_qcow = { 1013 .format_name = "qcow", 1014 .instance_size = sizeof(BDRVQcowState), 1015 .bdrv_probe = qcow_probe, 1016 .bdrv_open = qcow_open, 1017 .bdrv_close = qcow_close, 1018 .bdrv_create = qcow_create, 1019 .bdrv_flush = qcow_flush, 1020 .bdrv_is_allocated = qcow_is_allocated, 1021 .bdrv_set_key = qcow_set_key, 1022 .bdrv_make_empty = qcow_make_empty, 1023 .bdrv_aio_readv = qcow_aio_readv, 1024 .bdrv_aio_writev = qcow_aio_writev, 1025 .bdrv_aio_flush = qcow_aio_flush, 1026 .bdrv_write_compressed = qcow_write_compressed, 1027 .bdrv_get_info = qcow_get_info, 1028 1029 .create_options = qcow_create_options, 1030 }; 1031 1032 static void bdrv_qcow_init(void) 1033 { 1034 bdrv_register(&bdrv_qcow); 1035 } 1036 1037 block_init(bdrv_qcow_init); 1038