1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include <zlib.h> 27 28 #include "qapi/error.h" 29 #include "qcow2.h" 30 #include "qemu/bswap.h" 31 #include "qemu/memalign.h" 32 #include "trace.h" 33 34 int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size) 35 { 36 BDRVQcow2State *s = bs->opaque; 37 int new_l1_size, i, ret; 38 39 if (exact_size >= s->l1_size) { 40 return 0; 41 } 42 43 new_l1_size = exact_size; 44 45 #ifdef DEBUG_ALLOC2 46 fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size); 47 #endif 48 49 BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE); 50 ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset + 51 new_l1_size * L1E_SIZE, 52 (s->l1_size - new_l1_size) * L1E_SIZE, 0); 53 if (ret < 0) { 54 goto fail; 55 } 56 57 ret = bdrv_flush(bs->file->bs); 58 if (ret < 0) { 59 goto fail; 60 } 61 62 BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS); 63 for (i = s->l1_size - 1; i > new_l1_size - 1; i--) { 64 if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) { 65 continue; 66 } 67 qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK, 68 s->cluster_size, QCOW2_DISCARD_ALWAYS); 69 s->l1_table[i] = 0; 70 } 71 return 0; 72 73 fail: 74 /* 75 * If the write in the l1_table failed the image may contain a partially 76 * overwritten l1_table. In this case it would be better to clear the 77 * l1_table in memory to avoid possible image corruption. 78 */ 79 memset(s->l1_table + new_l1_size, 0, 80 (s->l1_size - new_l1_size) * L1E_SIZE); 81 return ret; 82 } 83 84 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, 85 bool exact_size) 86 { 87 BDRVQcow2State *s = bs->opaque; 88 int new_l1_size2, ret, i; 89 uint64_t *new_l1_table; 90 int64_t old_l1_table_offset, old_l1_size; 91 int64_t new_l1_table_offset, new_l1_size; 92 uint8_t data[12]; 93 94 if (min_size <= s->l1_size) 95 return 0; 96 97 /* Do a sanity check on min_size before trying to calculate new_l1_size 98 * (this prevents overflows during the while loop for the calculation of 99 * new_l1_size) */ 100 if (min_size > INT_MAX / L1E_SIZE) { 101 return -EFBIG; 102 } 103 104 if (exact_size) { 105 new_l1_size = min_size; 106 } else { 107 /* Bump size up to reduce the number of times we have to grow */ 108 new_l1_size = s->l1_size; 109 if (new_l1_size == 0) { 110 new_l1_size = 1; 111 } 112 while (min_size > new_l1_size) { 113 new_l1_size = DIV_ROUND_UP(new_l1_size * 3, 2); 114 } 115 } 116 117 QEMU_BUILD_BUG_ON(QCOW_MAX_L1_SIZE > INT_MAX); 118 if (new_l1_size > QCOW_MAX_L1_SIZE / L1E_SIZE) { 119 return -EFBIG; 120 } 121 122 #ifdef DEBUG_ALLOC2 123 fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", 124 s->l1_size, new_l1_size); 125 #endif 126 127 new_l1_size2 = L1E_SIZE * new_l1_size; 128 new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_size2); 129 if (new_l1_table == NULL) { 130 return -ENOMEM; 131 } 132 memset(new_l1_table, 0, new_l1_size2); 133 134 if (s->l1_size) { 135 memcpy(new_l1_table, s->l1_table, s->l1_size * L1E_SIZE); 136 } 137 138 /* write new table (align to cluster) */ 139 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); 140 new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); 141 if (new_l1_table_offset < 0) { 142 qemu_vfree(new_l1_table); 143 return new_l1_table_offset; 144 } 145 146 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 147 if (ret < 0) { 148 goto fail; 149 } 150 151 /* the L1 position has not yet been updated, so these clusters must 152 * indeed be completely free */ 153 ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset, 154 new_l1_size2, false); 155 if (ret < 0) { 156 goto fail; 157 } 158 159 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); 160 for(i = 0; i < s->l1_size; i++) 161 new_l1_table[i] = cpu_to_be64(new_l1_table[i]); 162 ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_size2, 163 new_l1_table, 0); 164 if (ret < 0) 165 goto fail; 166 for(i = 0; i < s->l1_size; i++) 167 new_l1_table[i] = be64_to_cpu(new_l1_table[i]); 168 169 /* set new table */ 170 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); 171 stl_be_p(data, new_l1_size); 172 stq_be_p(data + 4, new_l1_table_offset); 173 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), 174 sizeof(data), data, 0); 175 if (ret < 0) { 176 goto fail; 177 } 178 qemu_vfree(s->l1_table); 179 old_l1_table_offset = s->l1_table_offset; 180 s->l1_table_offset = new_l1_table_offset; 181 s->l1_table = new_l1_table; 182 old_l1_size = s->l1_size; 183 s->l1_size = new_l1_size; 184 qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * L1E_SIZE, 185 QCOW2_DISCARD_OTHER); 186 return 0; 187 fail: 188 qemu_vfree(new_l1_table); 189 qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, 190 QCOW2_DISCARD_OTHER); 191 return ret; 192 } 193 194 /* 195 * l2_load 196 * 197 * @bs: The BlockDriverState 198 * @offset: A guest offset, used to calculate what slice of the L2 199 * table to load. 200 * @l2_offset: Offset to the L2 table in the image file. 201 * @l2_slice: Location to store the pointer to the L2 slice. 202 * 203 * Loads a L2 slice into memory (L2 slices are the parts of L2 tables 204 * that are loaded by the qcow2 cache). If the slice is in the cache, 205 * the cache is used; otherwise the L2 slice is loaded from the image 206 * file. 207 */ 208 static int l2_load(BlockDriverState *bs, uint64_t offset, 209 uint64_t l2_offset, uint64_t **l2_slice) 210 { 211 BDRVQcow2State *s = bs->opaque; 212 int start_of_slice = l2_entry_size(s) * 213 (offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset)); 214 215 return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice, 216 (void **)l2_slice); 217 } 218 219 /* 220 * Writes an L1 entry to disk (note that depending on the alignment 221 * requirements this function may write more that just one entry in 222 * order to prevent bdrv_pwrite from performing a read-modify-write) 223 */ 224 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) 225 { 226 BDRVQcow2State *s = bs->opaque; 227 int l1_start_index; 228 int i, ret; 229 int bufsize = MAX(L1E_SIZE, 230 MIN(bs->file->bs->bl.request_alignment, s->cluster_size)); 231 int nentries = bufsize / L1E_SIZE; 232 g_autofree uint64_t *buf = g_try_new0(uint64_t, nentries); 233 234 if (buf == NULL) { 235 return -ENOMEM; 236 } 237 238 l1_start_index = QEMU_ALIGN_DOWN(l1_index, nentries); 239 for (i = 0; i < MIN(nentries, s->l1_size - l1_start_index); i++) { 240 buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); 241 } 242 243 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1, 244 s->l1_table_offset + L1E_SIZE * l1_start_index, bufsize, false); 245 if (ret < 0) { 246 return ret; 247 } 248 249 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 250 ret = bdrv_pwrite_sync(bs->file, 251 s->l1_table_offset + L1E_SIZE * l1_start_index, 252 bufsize, buf, 0); 253 if (ret < 0) { 254 return ret; 255 } 256 257 return 0; 258 } 259 260 /* 261 * l2_allocate 262 * 263 * Allocate a new l2 entry in the file. If l1_index points to an already 264 * used entry in the L2 table (i.e. we are doing a copy on write for the L2 265 * table) copy the contents of the old L2 table into the newly allocated one. 266 * Otherwise the new table is initialized with zeros. 267 * 268 */ 269 270 static int l2_allocate(BlockDriverState *bs, int l1_index) 271 { 272 BDRVQcow2State *s = bs->opaque; 273 uint64_t old_l2_offset; 274 uint64_t *l2_slice = NULL; 275 unsigned slice, slice_size2, n_slices; 276 int64_t l2_offset; 277 int ret; 278 279 old_l2_offset = s->l1_table[l1_index]; 280 281 trace_qcow2_l2_allocate(bs, l1_index); 282 283 /* allocate a new l2 entry */ 284 285 l2_offset = qcow2_alloc_clusters(bs, s->l2_size * l2_entry_size(s)); 286 if (l2_offset < 0) { 287 ret = l2_offset; 288 goto fail; 289 } 290 291 /* The offset must fit in the offset field of the L1 table entry */ 292 assert((l2_offset & L1E_OFFSET_MASK) == l2_offset); 293 294 /* If we're allocating the table at offset 0 then something is wrong */ 295 if (l2_offset == 0) { 296 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid " 297 "allocation of L2 table at offset 0"); 298 ret = -EIO; 299 goto fail; 300 } 301 302 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 303 if (ret < 0) { 304 goto fail; 305 } 306 307 /* allocate a new entry in the l2 cache */ 308 309 slice_size2 = s->l2_slice_size * l2_entry_size(s); 310 n_slices = s->cluster_size / slice_size2; 311 312 trace_qcow2_l2_allocate_get_empty(bs, l1_index); 313 for (slice = 0; slice < n_slices; slice++) { 314 ret = qcow2_cache_get_empty(bs, s->l2_table_cache, 315 l2_offset + slice * slice_size2, 316 (void **) &l2_slice); 317 if (ret < 0) { 318 goto fail; 319 } 320 321 if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { 322 /* if there was no old l2 table, clear the new slice */ 323 memset(l2_slice, 0, slice_size2); 324 } else { 325 uint64_t *old_slice; 326 uint64_t old_l2_slice_offset = 327 (old_l2_offset & L1E_OFFSET_MASK) + slice * slice_size2; 328 329 /* if there was an old l2 table, read a slice from the disk */ 330 BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); 331 ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_slice_offset, 332 (void **) &old_slice); 333 if (ret < 0) { 334 goto fail; 335 } 336 337 memcpy(l2_slice, old_slice, slice_size2); 338 339 qcow2_cache_put(s->l2_table_cache, (void **) &old_slice); 340 } 341 342 /* write the l2 slice to the file */ 343 BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); 344 345 trace_qcow2_l2_allocate_write_l2(bs, l1_index); 346 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 347 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 348 } 349 350 ret = qcow2_cache_flush(bs, s->l2_table_cache); 351 if (ret < 0) { 352 goto fail; 353 } 354 355 /* update the L1 entry */ 356 trace_qcow2_l2_allocate_write_l1(bs, l1_index); 357 s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; 358 ret = qcow2_write_l1_entry(bs, l1_index); 359 if (ret < 0) { 360 goto fail; 361 } 362 363 trace_qcow2_l2_allocate_done(bs, l1_index, 0); 364 return 0; 365 366 fail: 367 trace_qcow2_l2_allocate_done(bs, l1_index, ret); 368 if (l2_slice != NULL) { 369 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 370 } 371 s->l1_table[l1_index] = old_l2_offset; 372 if (l2_offset > 0) { 373 qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), 374 QCOW2_DISCARD_ALWAYS); 375 } 376 return ret; 377 } 378 379 /* 380 * For a given L2 entry, count the number of contiguous subclusters of 381 * the same type starting from @sc_from. Compressed clusters are 382 * treated as if they were divided into subclusters of size 383 * s->subcluster_size. 384 * 385 * Return the number of contiguous subclusters and set @type to the 386 * subcluster type. 387 * 388 * If the L2 entry is invalid return -errno and set @type to 389 * QCOW2_SUBCLUSTER_INVALID. 390 */ 391 static int qcow2_get_subcluster_range_type(BlockDriverState *bs, 392 uint64_t l2_entry, 393 uint64_t l2_bitmap, 394 unsigned sc_from, 395 QCow2SubclusterType *type) 396 { 397 BDRVQcow2State *s = bs->opaque; 398 uint32_t val; 399 400 *type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_from); 401 402 if (*type == QCOW2_SUBCLUSTER_INVALID) { 403 return -EINVAL; 404 } else if (!has_subclusters(s) || *type == QCOW2_SUBCLUSTER_COMPRESSED) { 405 return s->subclusters_per_cluster - sc_from; 406 } 407 408 switch (*type) { 409 case QCOW2_SUBCLUSTER_NORMAL: 410 val = l2_bitmap | QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); 411 return cto32(val) - sc_from; 412 413 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 414 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 415 val = (l2_bitmap | QCOW_OFLAG_SUB_ZERO_RANGE(0, sc_from)) >> 32; 416 return cto32(val) - sc_from; 417 418 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 419 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 420 val = ((l2_bitmap >> 32) | l2_bitmap) 421 & ~QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); 422 return ctz32(val) - sc_from; 423 424 default: 425 g_assert_not_reached(); 426 } 427 } 428 429 /* 430 * Return the number of contiguous subclusters of the exact same type 431 * in a given L2 slice, starting from cluster @l2_index, subcluster 432 * @sc_index. Allocated subclusters are required to be contiguous in 433 * the image file. 434 * At most @nb_clusters are checked (note that this means clusters, 435 * not subclusters). 436 * Compressed clusters are always processed one by one but for the 437 * purpose of this count they are treated as if they were divided into 438 * subclusters of size s->subcluster_size. 439 * On failure return -errno and update @l2_index to point to the 440 * invalid entry. 441 */ 442 static int count_contiguous_subclusters(BlockDriverState *bs, int nb_clusters, 443 unsigned sc_index, uint64_t *l2_slice, 444 unsigned *l2_index) 445 { 446 BDRVQcow2State *s = bs->opaque; 447 int i, count = 0; 448 bool check_offset = false; 449 uint64_t expected_offset = 0; 450 QCow2SubclusterType expected_type = QCOW2_SUBCLUSTER_NORMAL, type; 451 452 assert(*l2_index + nb_clusters <= s->l2_slice_size); 453 454 for (i = 0; i < nb_clusters; i++) { 455 unsigned first_sc = (i == 0) ? sc_index : 0; 456 uint64_t l2_entry = get_l2_entry(s, l2_slice, *l2_index + i); 457 uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, *l2_index + i); 458 int ret = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, 459 first_sc, &type); 460 if (ret < 0) { 461 *l2_index += i; /* Point to the invalid entry */ 462 return -EIO; 463 } 464 if (i == 0) { 465 if (type == QCOW2_SUBCLUSTER_COMPRESSED) { 466 /* Compressed clusters are always processed one by one */ 467 return ret; 468 } 469 expected_type = type; 470 expected_offset = l2_entry & L2E_OFFSET_MASK; 471 check_offset = (type == QCOW2_SUBCLUSTER_NORMAL || 472 type == QCOW2_SUBCLUSTER_ZERO_ALLOC || 473 type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC); 474 } else if (type != expected_type) { 475 break; 476 } else if (check_offset) { 477 expected_offset += s->cluster_size; 478 if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { 479 break; 480 } 481 } 482 count += ret; 483 /* Stop if there are type changes before the end of the cluster */ 484 if (first_sc + ret < s->subclusters_per_cluster) { 485 break; 486 } 487 } 488 489 return count; 490 } 491 492 static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, 493 uint64_t src_cluster_offset, 494 unsigned offset_in_cluster, 495 QEMUIOVector *qiov) 496 { 497 int ret; 498 499 if (qiov->size == 0) { 500 return 0; 501 } 502 503 BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); 504 505 if (!bs->drv) { 506 return -ENOMEDIUM; 507 } 508 509 /* 510 * We never deal with requests that don't satisfy 511 * bdrv_check_qiov_request(), and aligning requests to clusters never 512 * breaks this condition. So, do some assertions before calling 513 * bs->drv->bdrv_co_preadv_part() which has int64_t arguments. 514 */ 515 assert(src_cluster_offset <= INT64_MAX); 516 assert(src_cluster_offset + offset_in_cluster <= INT64_MAX); 517 /* Cast qiov->size to uint64_t to silence a compiler warning on -m32 */ 518 assert((uint64_t)qiov->size <= INT64_MAX); 519 bdrv_check_qiov_request(src_cluster_offset + offset_in_cluster, qiov->size, 520 qiov, 0, &error_abort); 521 /* 522 * Call .bdrv_co_readv() directly instead of using the public block-layer 523 * interface. This avoids double I/O throttling and request tracking, 524 * which can lead to deadlock when block layer copy-on-read is enabled. 525 */ 526 ret = bs->drv->bdrv_co_preadv_part(bs, 527 src_cluster_offset + offset_in_cluster, 528 qiov->size, qiov, 0, 0); 529 if (ret < 0) { 530 return ret; 531 } 532 533 return 0; 534 } 535 536 static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, 537 uint64_t cluster_offset, 538 unsigned offset_in_cluster, 539 QEMUIOVector *qiov) 540 { 541 BDRVQcow2State *s = bs->opaque; 542 int ret; 543 544 if (qiov->size == 0) { 545 return 0; 546 } 547 548 ret = qcow2_pre_write_overlap_check(bs, 0, 549 cluster_offset + offset_in_cluster, qiov->size, true); 550 if (ret < 0) { 551 return ret; 552 } 553 554 BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); 555 ret = bdrv_co_pwritev(s->data_file, cluster_offset + offset_in_cluster, 556 qiov->size, qiov, 0); 557 if (ret < 0) { 558 return ret; 559 } 560 561 return 0; 562 } 563 564 565 /* 566 * get_host_offset 567 * 568 * For a given offset of the virtual disk find the equivalent host 569 * offset in the qcow2 file and store it in *host_offset. Neither 570 * offset needs to be aligned to a cluster boundary. 571 * 572 * If the cluster is unallocated then *host_offset will be 0. 573 * If the cluster is compressed then *host_offset will contain the l2 entry. 574 * 575 * On entry, *bytes is the maximum number of contiguous bytes starting at 576 * offset that we are interested in. 577 * 578 * On exit, *bytes is the number of bytes starting at offset that have the same 579 * subcluster type and (if applicable) are stored contiguously in the image 580 * file. The subcluster type is stored in *subcluster_type. 581 * Compressed clusters are always processed one by one. 582 * 583 * Returns 0 on success, -errno in error cases. 584 */ 585 int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, 586 unsigned int *bytes, uint64_t *host_offset, 587 QCow2SubclusterType *subcluster_type) 588 { 589 BDRVQcow2State *s = bs->opaque; 590 unsigned int l2_index, sc_index; 591 uint64_t l1_index, l2_offset, *l2_slice, l2_entry, l2_bitmap; 592 int sc; 593 unsigned int offset_in_cluster; 594 uint64_t bytes_available, bytes_needed, nb_clusters; 595 QCow2SubclusterType type; 596 int ret; 597 598 offset_in_cluster = offset_into_cluster(s, offset); 599 bytes_needed = (uint64_t) *bytes + offset_in_cluster; 600 601 /* compute how many bytes there are between the start of the cluster 602 * containing offset and the end of the l2 slice that contains 603 * the entry pointing to it */ 604 bytes_available = 605 ((uint64_t) (s->l2_slice_size - offset_to_l2_slice_index(s, offset))) 606 << s->cluster_bits; 607 608 if (bytes_needed > bytes_available) { 609 bytes_needed = bytes_available; 610 } 611 612 *host_offset = 0; 613 614 /* seek to the l2 offset in the l1 table */ 615 616 l1_index = offset_to_l1_index(s, offset); 617 if (l1_index >= s->l1_size) { 618 type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; 619 goto out; 620 } 621 622 l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 623 if (!l2_offset) { 624 type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; 625 goto out; 626 } 627 628 if (offset_into_cluster(s, l2_offset)) { 629 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 630 " unaligned (L1 index: %#" PRIx64 ")", 631 l2_offset, l1_index); 632 return -EIO; 633 } 634 635 /* load the l2 slice in memory */ 636 637 ret = l2_load(bs, offset, l2_offset, &l2_slice); 638 if (ret < 0) { 639 return ret; 640 } 641 642 /* find the cluster offset for the given disk offset */ 643 644 l2_index = offset_to_l2_slice_index(s, offset); 645 sc_index = offset_to_sc_index(s, offset); 646 l2_entry = get_l2_entry(s, l2_slice, l2_index); 647 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 648 649 nb_clusters = size_to_clusters(s, bytes_needed); 650 /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned 651 * integers; the minimum cluster size is 512, so this assertion is always 652 * true */ 653 assert(nb_clusters <= INT_MAX); 654 655 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 656 if (s->qcow_version < 3 && (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || 657 type == QCOW2_SUBCLUSTER_ZERO_ALLOC)) { 658 qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" 659 " in pre-v3 image (L2 offset: %#" PRIx64 660 ", L2 index: %#x)", l2_offset, l2_index); 661 ret = -EIO; 662 goto fail; 663 } 664 switch (type) { 665 case QCOW2_SUBCLUSTER_INVALID: 666 break; /* This is handled by count_contiguous_subclusters() below */ 667 case QCOW2_SUBCLUSTER_COMPRESSED: 668 if (has_data_file(bs)) { 669 qcow2_signal_corruption(bs, true, -1, -1, "Compressed cluster " 670 "entry found in image with external data " 671 "file (L2 offset: %#" PRIx64 ", L2 index: " 672 "%#x)", l2_offset, l2_index); 673 ret = -EIO; 674 goto fail; 675 } 676 *host_offset = l2_entry; 677 break; 678 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 679 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 680 break; 681 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 682 case QCOW2_SUBCLUSTER_NORMAL: 683 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: { 684 uint64_t host_cluster_offset = l2_entry & L2E_OFFSET_MASK; 685 *host_offset = host_cluster_offset + offset_in_cluster; 686 if (offset_into_cluster(s, host_cluster_offset)) { 687 qcow2_signal_corruption(bs, true, -1, -1, 688 "Cluster allocation offset %#" 689 PRIx64 " unaligned (L2 offset: %#" PRIx64 690 ", L2 index: %#x)", host_cluster_offset, 691 l2_offset, l2_index); 692 ret = -EIO; 693 goto fail; 694 } 695 if (has_data_file(bs) && *host_offset != offset) { 696 qcow2_signal_corruption(bs, true, -1, -1, 697 "External data file host cluster offset %#" 698 PRIx64 " does not match guest cluster " 699 "offset: %#" PRIx64 700 ", L2 index: %#x)", host_cluster_offset, 701 offset - offset_in_cluster, l2_index); 702 ret = -EIO; 703 goto fail; 704 } 705 break; 706 } 707 default: 708 abort(); 709 } 710 711 sc = count_contiguous_subclusters(bs, nb_clusters, sc_index, 712 l2_slice, &l2_index); 713 if (sc < 0) { 714 qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster entry found " 715 " (L2 offset: %#" PRIx64 ", L2 index: %#x)", 716 l2_offset, l2_index); 717 ret = -EIO; 718 goto fail; 719 } 720 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 721 722 bytes_available = ((int64_t)sc + sc_index) << s->subcluster_bits; 723 724 out: 725 if (bytes_available > bytes_needed) { 726 bytes_available = bytes_needed; 727 } 728 729 /* bytes_available <= bytes_needed <= *bytes + offset_in_cluster; 730 * subtracting offset_in_cluster will therefore definitely yield something 731 * not exceeding UINT_MAX */ 732 assert(bytes_available - offset_in_cluster <= UINT_MAX); 733 *bytes = bytes_available - offset_in_cluster; 734 735 *subcluster_type = type; 736 737 return 0; 738 739 fail: 740 qcow2_cache_put(s->l2_table_cache, (void **)&l2_slice); 741 return ret; 742 } 743 744 /* 745 * get_cluster_table 746 * 747 * for a given disk offset, load (and allocate if needed) 748 * the appropriate slice of its l2 table. 749 * 750 * the cluster index in the l2 slice is given to the caller. 751 * 752 * Returns 0 on success, -errno in failure case 753 */ 754 static int get_cluster_table(BlockDriverState *bs, uint64_t offset, 755 uint64_t **new_l2_slice, 756 int *new_l2_index) 757 { 758 BDRVQcow2State *s = bs->opaque; 759 unsigned int l2_index; 760 uint64_t l1_index, l2_offset; 761 uint64_t *l2_slice = NULL; 762 int ret; 763 764 /* seek to the l2 offset in the l1 table */ 765 766 l1_index = offset_to_l1_index(s, offset); 767 if (l1_index >= s->l1_size) { 768 ret = qcow2_grow_l1_table(bs, l1_index + 1, false); 769 if (ret < 0) { 770 return ret; 771 } 772 } 773 774 assert(l1_index < s->l1_size); 775 l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 776 if (offset_into_cluster(s, l2_offset)) { 777 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 778 " unaligned (L1 index: %#" PRIx64 ")", 779 l2_offset, l1_index); 780 return -EIO; 781 } 782 783 if (!(s->l1_table[l1_index] & QCOW_OFLAG_COPIED)) { 784 /* First allocate a new L2 table (and do COW if needed) */ 785 ret = l2_allocate(bs, l1_index); 786 if (ret < 0) { 787 return ret; 788 } 789 790 /* Then decrease the refcount of the old table */ 791 if (l2_offset) { 792 qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), 793 QCOW2_DISCARD_OTHER); 794 } 795 796 /* Get the offset of the newly-allocated l2 table */ 797 l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 798 assert(offset_into_cluster(s, l2_offset) == 0); 799 } 800 801 /* load the l2 slice in memory */ 802 ret = l2_load(bs, offset, l2_offset, &l2_slice); 803 if (ret < 0) { 804 return ret; 805 } 806 807 /* find the cluster offset for the given disk offset */ 808 809 l2_index = offset_to_l2_slice_index(s, offset); 810 811 *new_l2_slice = l2_slice; 812 *new_l2_index = l2_index; 813 814 return 0; 815 } 816 817 /* 818 * alloc_compressed_cluster_offset 819 * 820 * For a given offset on the virtual disk, allocate a new compressed cluster 821 * and put the host offset of the cluster into *host_offset. If a cluster is 822 * already allocated at the offset, return an error. 823 * 824 * Return 0 on success and -errno in error cases 825 */ 826 int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, 827 uint64_t offset, 828 int compressed_size, 829 uint64_t *host_offset) 830 { 831 BDRVQcow2State *s = bs->opaque; 832 int l2_index, ret; 833 uint64_t *l2_slice; 834 int64_t cluster_offset; 835 int nb_csectors; 836 837 if (has_data_file(bs)) { 838 return 0; 839 } 840 841 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 842 if (ret < 0) { 843 return ret; 844 } 845 846 /* Compression can't overwrite anything. Fail if the cluster was already 847 * allocated. */ 848 cluster_offset = get_l2_entry(s, l2_slice, l2_index); 849 if (cluster_offset & L2E_OFFSET_MASK) { 850 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 851 return -EIO; 852 } 853 854 cluster_offset = qcow2_alloc_bytes(bs, compressed_size); 855 if (cluster_offset < 0) { 856 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 857 return cluster_offset; 858 } 859 860 nb_csectors = 861 (cluster_offset + compressed_size - 1) / QCOW2_COMPRESSED_SECTOR_SIZE - 862 (cluster_offset / QCOW2_COMPRESSED_SECTOR_SIZE); 863 864 /* The offset and size must fit in their fields of the L2 table entry */ 865 assert((cluster_offset & s->cluster_offset_mask) == cluster_offset); 866 assert((nb_csectors & s->csize_mask) == nb_csectors); 867 868 cluster_offset |= QCOW_OFLAG_COMPRESSED | 869 ((uint64_t)nb_csectors << s->csize_shift); 870 871 /* update L2 table */ 872 873 /* compressed clusters never have the copied flag */ 874 875 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); 876 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 877 set_l2_entry(s, l2_slice, l2_index, cluster_offset); 878 if (has_subclusters(s)) { 879 set_l2_bitmap(s, l2_slice, l2_index, 0); 880 } 881 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 882 883 *host_offset = cluster_offset & s->cluster_offset_mask; 884 return 0; 885 } 886 887 static int coroutine_fn perform_cow(BlockDriverState *bs, QCowL2Meta *m) 888 { 889 BDRVQcow2State *s = bs->opaque; 890 Qcow2COWRegion *start = &m->cow_start; 891 Qcow2COWRegion *end = &m->cow_end; 892 unsigned buffer_size; 893 unsigned data_bytes = end->offset - (start->offset + start->nb_bytes); 894 bool merge_reads; 895 uint8_t *start_buffer, *end_buffer; 896 QEMUIOVector qiov; 897 int ret; 898 899 assert(start->nb_bytes <= UINT_MAX - end->nb_bytes); 900 assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes); 901 assert(start->offset + start->nb_bytes <= end->offset); 902 903 if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) { 904 return 0; 905 } 906 907 /* If we have to read both the start and end COW regions and the 908 * middle region is not too large then perform just one read 909 * operation */ 910 merge_reads = start->nb_bytes && end->nb_bytes && data_bytes <= 16384; 911 if (merge_reads) { 912 buffer_size = start->nb_bytes + data_bytes + end->nb_bytes; 913 } else { 914 /* If we have to do two reads, add some padding in the middle 915 * if necessary to make sure that the end region is optimally 916 * aligned. */ 917 size_t align = bdrv_opt_mem_align(bs); 918 assert(align > 0 && align <= UINT_MAX); 919 assert(QEMU_ALIGN_UP(start->nb_bytes, align) <= 920 UINT_MAX - end->nb_bytes); 921 buffer_size = QEMU_ALIGN_UP(start->nb_bytes, align) + end->nb_bytes; 922 } 923 924 /* Reserve a buffer large enough to store all the data that we're 925 * going to read */ 926 start_buffer = qemu_try_blockalign(bs, buffer_size); 927 if (start_buffer == NULL) { 928 return -ENOMEM; 929 } 930 /* The part of the buffer where the end region is located */ 931 end_buffer = start_buffer + buffer_size - end->nb_bytes; 932 933 qemu_iovec_init(&qiov, 2 + (m->data_qiov ? 934 qemu_iovec_subvec_niov(m->data_qiov, 935 m->data_qiov_offset, 936 data_bytes) 937 : 0)); 938 939 qemu_co_mutex_unlock(&s->lock); 940 /* First we read the existing data from both COW regions. We 941 * either read the whole region in one go, or the start and end 942 * regions separately. */ 943 if (merge_reads) { 944 qemu_iovec_add(&qiov, start_buffer, buffer_size); 945 ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); 946 } else { 947 qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 948 ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); 949 if (ret < 0) { 950 goto fail; 951 } 952 953 qemu_iovec_reset(&qiov); 954 qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 955 ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov); 956 } 957 if (ret < 0) { 958 goto fail; 959 } 960 961 /* Encrypt the data if necessary before writing it */ 962 if (bs->encrypted) { 963 ret = qcow2_co_encrypt(bs, 964 m->alloc_offset + start->offset, 965 m->offset + start->offset, 966 start_buffer, start->nb_bytes); 967 if (ret < 0) { 968 goto fail; 969 } 970 971 ret = qcow2_co_encrypt(bs, 972 m->alloc_offset + end->offset, 973 m->offset + end->offset, 974 end_buffer, end->nb_bytes); 975 if (ret < 0) { 976 goto fail; 977 } 978 } 979 980 /* And now we can write everything. If we have the guest data we 981 * can write everything in one single operation */ 982 if (m->data_qiov) { 983 qemu_iovec_reset(&qiov); 984 if (start->nb_bytes) { 985 qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 986 } 987 qemu_iovec_concat(&qiov, m->data_qiov, m->data_qiov_offset, data_bytes); 988 if (end->nb_bytes) { 989 qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 990 } 991 /* NOTE: we have a write_aio blkdebug event here followed by 992 * a cow_write one in do_perform_cow_write(), but there's only 993 * one single I/O operation */ 994 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 995 ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); 996 } else { 997 /* If there's no guest data then write both COW regions separately */ 998 qemu_iovec_reset(&qiov); 999 qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 1000 ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); 1001 if (ret < 0) { 1002 goto fail; 1003 } 1004 1005 qemu_iovec_reset(&qiov); 1006 qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 1007 ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov); 1008 } 1009 1010 fail: 1011 qemu_co_mutex_lock(&s->lock); 1012 1013 /* 1014 * Before we update the L2 table to actually point to the new cluster, we 1015 * need to be sure that the refcounts have been increased and COW was 1016 * handled. 1017 */ 1018 if (ret == 0) { 1019 qcow2_cache_depends_on_flush(s->l2_table_cache); 1020 } 1021 1022 qemu_vfree(start_buffer); 1023 qemu_iovec_destroy(&qiov); 1024 return ret; 1025 } 1026 1027 int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 1028 QCowL2Meta *m) 1029 { 1030 BDRVQcow2State *s = bs->opaque; 1031 int i, j = 0, l2_index, ret; 1032 uint64_t *old_cluster, *l2_slice; 1033 uint64_t cluster_offset = m->alloc_offset; 1034 1035 trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); 1036 assert(m->nb_clusters > 0); 1037 1038 old_cluster = g_try_new(uint64_t, m->nb_clusters); 1039 if (old_cluster == NULL) { 1040 ret = -ENOMEM; 1041 goto err; 1042 } 1043 1044 /* copy content of unmodified sectors */ 1045 ret = perform_cow(bs, m); 1046 if (ret < 0) { 1047 goto err; 1048 } 1049 1050 /* Update L2 table. */ 1051 if (s->use_lazy_refcounts) { 1052 qcow2_mark_dirty(bs); 1053 } 1054 if (qcow2_need_accurate_refcounts(s)) { 1055 qcow2_cache_set_dependency(bs, s->l2_table_cache, 1056 s->refcount_block_cache); 1057 } 1058 1059 ret = get_cluster_table(bs, m->offset, &l2_slice, &l2_index); 1060 if (ret < 0) { 1061 goto err; 1062 } 1063 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 1064 1065 assert(l2_index + m->nb_clusters <= s->l2_slice_size); 1066 assert(m->cow_end.offset + m->cow_end.nb_bytes <= 1067 m->nb_clusters << s->cluster_bits); 1068 for (i = 0; i < m->nb_clusters; i++) { 1069 uint64_t offset = cluster_offset + ((uint64_t)i << s->cluster_bits); 1070 /* if two concurrent writes happen to the same unallocated cluster 1071 * each write allocates separate cluster and writes data concurrently. 1072 * The first one to complete updates l2 table with pointer to its 1073 * cluster the second one has to do RMW (which is done above by 1074 * perform_cow()), update l2 table with its cluster pointer and free 1075 * old cluster. This is what this loop does */ 1076 if (get_l2_entry(s, l2_slice, l2_index + i) != 0) { 1077 old_cluster[j++] = get_l2_entry(s, l2_slice, l2_index + i); 1078 } 1079 1080 /* The offset must fit in the offset field of the L2 table entry */ 1081 assert((offset & L2E_OFFSET_MASK) == offset); 1082 1083 set_l2_entry(s, l2_slice, l2_index + i, offset | QCOW_OFLAG_COPIED); 1084 1085 /* Update bitmap with the subclusters that were just written */ 1086 if (has_subclusters(s) && !m->prealloc) { 1087 uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1088 unsigned written_from = m->cow_start.offset; 1089 unsigned written_to = m->cow_end.offset + m->cow_end.nb_bytes; 1090 int first_sc, last_sc; 1091 /* Narrow written_from and written_to down to the current cluster */ 1092 written_from = MAX(written_from, i << s->cluster_bits); 1093 written_to = MIN(written_to, (i + 1) << s->cluster_bits); 1094 assert(written_from < written_to); 1095 first_sc = offset_to_sc_index(s, written_from); 1096 last_sc = offset_to_sc_index(s, written_to - 1); 1097 l2_bitmap |= QCOW_OFLAG_SUB_ALLOC_RANGE(first_sc, last_sc + 1); 1098 l2_bitmap &= ~QCOW_OFLAG_SUB_ZERO_RANGE(first_sc, last_sc + 1); 1099 set_l2_bitmap(s, l2_slice, l2_index + i, l2_bitmap); 1100 } 1101 } 1102 1103 1104 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1105 1106 /* 1107 * If this was a COW, we need to decrease the refcount of the old cluster. 1108 * 1109 * Don't discard clusters that reach a refcount of 0 (e.g. compressed 1110 * clusters), the next write will reuse them anyway. 1111 */ 1112 if (!m->keep_old_clusters && j != 0) { 1113 for (i = 0; i < j; i++) { 1114 qcow2_free_any_cluster(bs, old_cluster[i], QCOW2_DISCARD_NEVER); 1115 } 1116 } 1117 1118 ret = 0; 1119 err: 1120 g_free(old_cluster); 1121 return ret; 1122 } 1123 1124 /** 1125 * Frees the allocated clusters because the request failed and they won't 1126 * actually be linked. 1127 */ 1128 void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) 1129 { 1130 BDRVQcow2State *s = bs->opaque; 1131 if (!has_data_file(bs) && !m->keep_old_clusters) { 1132 qcow2_free_clusters(bs, m->alloc_offset, 1133 m->nb_clusters << s->cluster_bits, 1134 QCOW2_DISCARD_NEVER); 1135 } 1136 } 1137 1138 /* 1139 * For a given write request, create a new QCowL2Meta structure, add 1140 * it to @m and the BDRVQcow2State.cluster_allocs list. If the write 1141 * request does not need copy-on-write or changes to the L2 metadata 1142 * then this function does nothing. 1143 * 1144 * @host_cluster_offset points to the beginning of the first cluster. 1145 * 1146 * @guest_offset and @bytes indicate the offset and length of the 1147 * request. 1148 * 1149 * @l2_slice contains the L2 entries of all clusters involved in this 1150 * write request. 1151 * 1152 * If @keep_old is true it means that the clusters were already 1153 * allocated and will be overwritten. If false then the clusters are 1154 * new and we have to decrease the reference count of the old ones. 1155 * 1156 * Returns 0 on success, -errno on failure. 1157 */ 1158 static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset, 1159 uint64_t guest_offset, unsigned bytes, 1160 uint64_t *l2_slice, QCowL2Meta **m, bool keep_old) 1161 { 1162 BDRVQcow2State *s = bs->opaque; 1163 int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset); 1164 uint64_t l2_entry, l2_bitmap; 1165 unsigned cow_start_from, cow_end_to; 1166 unsigned cow_start_to = offset_into_cluster(s, guest_offset); 1167 unsigned cow_end_from = cow_start_to + bytes; 1168 unsigned nb_clusters = size_to_clusters(s, cow_end_from); 1169 QCowL2Meta *old_m = *m; 1170 QCow2SubclusterType type; 1171 int i; 1172 bool skip_cow = keep_old; 1173 1174 assert(nb_clusters <= s->l2_slice_size - l2_index); 1175 1176 /* Check the type of all affected subclusters */ 1177 for (i = 0; i < nb_clusters; i++) { 1178 l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1179 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1180 if (skip_cow) { 1181 unsigned write_from = MAX(cow_start_to, i << s->cluster_bits); 1182 unsigned write_to = MIN(cow_end_from, (i + 1) << s->cluster_bits); 1183 int first_sc = offset_to_sc_index(s, write_from); 1184 int last_sc = offset_to_sc_index(s, write_to - 1); 1185 int cnt = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, 1186 first_sc, &type); 1187 /* Is any of the subclusters of type != QCOW2_SUBCLUSTER_NORMAL ? */ 1188 if (type != QCOW2_SUBCLUSTER_NORMAL || first_sc + cnt <= last_sc) { 1189 skip_cow = false; 1190 } 1191 } else { 1192 /* If we can't skip the cow we can still look for invalid entries */ 1193 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, 0); 1194 } 1195 if (type == QCOW2_SUBCLUSTER_INVALID) { 1196 int l1_index = offset_to_l1_index(s, guest_offset); 1197 uint64_t l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 1198 qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster " 1199 "entry found (L2 offset: %#" PRIx64 1200 ", L2 index: %#x)", 1201 l2_offset, l2_index + i); 1202 return -EIO; 1203 } 1204 } 1205 1206 if (skip_cow) { 1207 return 0; 1208 } 1209 1210 /* Get the L2 entry of the first cluster */ 1211 l2_entry = get_l2_entry(s, l2_slice, l2_index); 1212 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 1213 sc_index = offset_to_sc_index(s, guest_offset); 1214 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 1215 1216 if (!keep_old) { 1217 switch (type) { 1218 case QCOW2_SUBCLUSTER_COMPRESSED: 1219 cow_start_from = 0; 1220 break; 1221 case QCOW2_SUBCLUSTER_NORMAL: 1222 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1223 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1224 if (has_subclusters(s)) { 1225 /* Skip all leading zero and unallocated subclusters */ 1226 uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; 1227 cow_start_from = 1228 MIN(sc_index, ctz32(alloc_bitmap)) << s->subcluster_bits; 1229 } else { 1230 cow_start_from = 0; 1231 } 1232 break; 1233 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 1234 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 1235 cow_start_from = sc_index << s->subcluster_bits; 1236 break; 1237 default: 1238 g_assert_not_reached(); 1239 } 1240 } else { 1241 switch (type) { 1242 case QCOW2_SUBCLUSTER_NORMAL: 1243 cow_start_from = cow_start_to; 1244 break; 1245 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1246 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1247 cow_start_from = sc_index << s->subcluster_bits; 1248 break; 1249 default: 1250 g_assert_not_reached(); 1251 } 1252 } 1253 1254 /* Get the L2 entry of the last cluster */ 1255 l2_index += nb_clusters - 1; 1256 l2_entry = get_l2_entry(s, l2_slice, l2_index); 1257 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 1258 sc_index = offset_to_sc_index(s, guest_offset + bytes - 1); 1259 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 1260 1261 if (!keep_old) { 1262 switch (type) { 1263 case QCOW2_SUBCLUSTER_COMPRESSED: 1264 cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); 1265 break; 1266 case QCOW2_SUBCLUSTER_NORMAL: 1267 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1268 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1269 cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); 1270 if (has_subclusters(s)) { 1271 /* Skip all trailing zero and unallocated subclusters */ 1272 uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; 1273 cow_end_to -= 1274 MIN(s->subclusters_per_cluster - sc_index - 1, 1275 clz32(alloc_bitmap)) << s->subcluster_bits; 1276 } 1277 break; 1278 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 1279 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 1280 cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); 1281 break; 1282 default: 1283 g_assert_not_reached(); 1284 } 1285 } else { 1286 switch (type) { 1287 case QCOW2_SUBCLUSTER_NORMAL: 1288 cow_end_to = cow_end_from; 1289 break; 1290 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1291 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1292 cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); 1293 break; 1294 default: 1295 g_assert_not_reached(); 1296 } 1297 } 1298 1299 *m = g_malloc0(sizeof(**m)); 1300 **m = (QCowL2Meta) { 1301 .next = old_m, 1302 1303 .alloc_offset = host_cluster_offset, 1304 .offset = start_of_cluster(s, guest_offset), 1305 .nb_clusters = nb_clusters, 1306 1307 .keep_old_clusters = keep_old, 1308 1309 .cow_start = { 1310 .offset = cow_start_from, 1311 .nb_bytes = cow_start_to - cow_start_from, 1312 }, 1313 .cow_end = { 1314 .offset = cow_end_from, 1315 .nb_bytes = cow_end_to - cow_end_from, 1316 }, 1317 }; 1318 1319 qemu_co_queue_init(&(*m)->dependent_requests); 1320 QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); 1321 1322 return 0; 1323 } 1324 1325 /* 1326 * Returns true if writing to the cluster pointed to by @l2_entry 1327 * requires a new allocation (that is, if the cluster is unallocated 1328 * or has refcount > 1 and therefore cannot be written in-place). 1329 */ 1330 static bool cluster_needs_new_alloc(BlockDriverState *bs, uint64_t l2_entry) 1331 { 1332 switch (qcow2_get_cluster_type(bs, l2_entry)) { 1333 case QCOW2_CLUSTER_NORMAL: 1334 case QCOW2_CLUSTER_ZERO_ALLOC: 1335 if (l2_entry & QCOW_OFLAG_COPIED) { 1336 return false; 1337 } 1338 /* fallthrough */ 1339 case QCOW2_CLUSTER_UNALLOCATED: 1340 case QCOW2_CLUSTER_COMPRESSED: 1341 case QCOW2_CLUSTER_ZERO_PLAIN: 1342 return true; 1343 default: 1344 abort(); 1345 } 1346 } 1347 1348 /* 1349 * Returns the number of contiguous clusters that can be written to 1350 * using one single write request, starting from @l2_index. 1351 * At most @nb_clusters are checked. 1352 * 1353 * If @new_alloc is true this counts clusters that are either 1354 * unallocated, or allocated but with refcount > 1 (so they need to be 1355 * newly allocated and COWed). 1356 * 1357 * If @new_alloc is false this counts clusters that are already 1358 * allocated and can be overwritten in-place (this includes clusters 1359 * of type QCOW2_CLUSTER_ZERO_ALLOC). 1360 */ 1361 static int count_single_write_clusters(BlockDriverState *bs, int nb_clusters, 1362 uint64_t *l2_slice, int l2_index, 1363 bool new_alloc) 1364 { 1365 BDRVQcow2State *s = bs->opaque; 1366 uint64_t l2_entry = get_l2_entry(s, l2_slice, l2_index); 1367 uint64_t expected_offset = l2_entry & L2E_OFFSET_MASK; 1368 int i; 1369 1370 for (i = 0; i < nb_clusters; i++) { 1371 l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1372 if (cluster_needs_new_alloc(bs, l2_entry) != new_alloc) { 1373 break; 1374 } 1375 if (!new_alloc) { 1376 if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { 1377 break; 1378 } 1379 expected_offset += s->cluster_size; 1380 } 1381 } 1382 1383 assert(i <= nb_clusters); 1384 return i; 1385 } 1386 1387 /* 1388 * Check if there already is an AIO write request in flight which allocates 1389 * the same cluster. In this case we need to wait until the previous 1390 * request has completed and updated the L2 table accordingly. 1391 * 1392 * Returns: 1393 * 0 if there was no dependency. *cur_bytes indicates the number of 1394 * bytes from guest_offset that can be read before the next 1395 * dependency must be processed (or the request is complete) 1396 * 1397 * -EAGAIN if we had to wait for another request, previously gathered 1398 * information on cluster allocation may be invalid now. The caller 1399 * must start over anyway, so consider *cur_bytes undefined. 1400 */ 1401 static int coroutine_fn handle_dependencies(BlockDriverState *bs, 1402 uint64_t guest_offset, 1403 uint64_t *cur_bytes, QCowL2Meta **m) 1404 { 1405 BDRVQcow2State *s = bs->opaque; 1406 QCowL2Meta *old_alloc; 1407 uint64_t bytes = *cur_bytes; 1408 1409 QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { 1410 1411 uint64_t start = guest_offset; 1412 uint64_t end = start + bytes; 1413 uint64_t old_start = start_of_cluster(s, l2meta_cow_start(old_alloc)); 1414 uint64_t old_end = ROUND_UP(l2meta_cow_end(old_alloc), s->cluster_size); 1415 1416 if (end <= old_start || start >= old_end) { 1417 /* No intersection */ 1418 continue; 1419 } 1420 1421 if (old_alloc->keep_old_clusters && 1422 (end <= l2meta_cow_start(old_alloc) || 1423 start >= l2meta_cow_end(old_alloc))) 1424 { 1425 /* 1426 * Clusters intersect but COW areas don't. And cluster itself is 1427 * already allocated. So, there is no actual conflict. 1428 */ 1429 continue; 1430 } 1431 1432 /* Conflict */ 1433 1434 if (start < old_start) { 1435 /* Stop at the start of a running allocation */ 1436 bytes = old_start - start; 1437 } else { 1438 bytes = 0; 1439 } 1440 1441 /* 1442 * Stop if an l2meta already exists. After yielding, it wouldn't 1443 * be valid any more, so we'd have to clean up the old L2Metas 1444 * and deal with requests depending on them before starting to 1445 * gather new ones. Not worth the trouble. 1446 */ 1447 if (bytes == 0 && *m) { 1448 *cur_bytes = 0; 1449 return 0; 1450 } 1451 1452 if (bytes == 0) { 1453 /* 1454 * Wait for the dependency to complete. We need to recheck 1455 * the free/allocated clusters when we continue. 1456 */ 1457 qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock); 1458 return -EAGAIN; 1459 } 1460 } 1461 1462 /* Make sure that existing clusters and new allocations are only used up to 1463 * the next dependency if we shortened the request above */ 1464 *cur_bytes = bytes; 1465 1466 return 0; 1467 } 1468 1469 /* 1470 * Checks how many already allocated clusters that don't require a new 1471 * allocation there are at the given guest_offset (up to *bytes). 1472 * If *host_offset is not INV_OFFSET, only physically contiguous clusters 1473 * beginning at this host offset are counted. 1474 * 1475 * Note that guest_offset may not be cluster aligned. In this case, the 1476 * returned *host_offset points to exact byte referenced by guest_offset and 1477 * therefore isn't cluster aligned as well. 1478 * 1479 * Returns: 1480 * 0: if no allocated clusters are available at the given offset. 1481 * *bytes is normally unchanged. It is set to 0 if the cluster 1482 * is allocated and can be overwritten in-place but doesn't have 1483 * the right physical offset. 1484 * 1485 * 1: if allocated clusters that can be overwritten in place are 1486 * available at the requested offset. *bytes may have decreased 1487 * and describes the length of the area that can be written to. 1488 * 1489 * -errno: in error cases 1490 */ 1491 static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, 1492 uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) 1493 { 1494 BDRVQcow2State *s = bs->opaque; 1495 int l2_index; 1496 uint64_t l2_entry, cluster_offset; 1497 uint64_t *l2_slice; 1498 uint64_t nb_clusters; 1499 unsigned int keep_clusters; 1500 int ret; 1501 1502 trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, 1503 *bytes); 1504 1505 assert(*host_offset == INV_OFFSET || offset_into_cluster(s, guest_offset) 1506 == offset_into_cluster(s, *host_offset)); 1507 1508 /* 1509 * Calculate the number of clusters to look for. We stop at L2 slice 1510 * boundaries to keep things simple. 1511 */ 1512 nb_clusters = 1513 size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 1514 1515 l2_index = offset_to_l2_slice_index(s, guest_offset); 1516 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 1517 /* Limit total byte count to BDRV_REQUEST_MAX_BYTES */ 1518 nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); 1519 1520 /* Find L2 entry for the first involved cluster */ 1521 ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); 1522 if (ret < 0) { 1523 return ret; 1524 } 1525 1526 l2_entry = get_l2_entry(s, l2_slice, l2_index); 1527 cluster_offset = l2_entry & L2E_OFFSET_MASK; 1528 1529 if (!cluster_needs_new_alloc(bs, l2_entry)) { 1530 if (offset_into_cluster(s, cluster_offset)) { 1531 qcow2_signal_corruption(bs, true, -1, -1, "%s cluster offset " 1532 "%#" PRIx64 " unaligned (guest offset: %#" 1533 PRIx64 ")", l2_entry & QCOW_OFLAG_ZERO ? 1534 "Preallocated zero" : "Data", 1535 cluster_offset, guest_offset); 1536 ret = -EIO; 1537 goto out; 1538 } 1539 1540 /* If a specific host_offset is required, check it */ 1541 if (*host_offset != INV_OFFSET && cluster_offset != *host_offset) { 1542 *bytes = 0; 1543 ret = 0; 1544 goto out; 1545 } 1546 1547 /* We keep all QCOW_OFLAG_COPIED clusters */ 1548 keep_clusters = count_single_write_clusters(bs, nb_clusters, l2_slice, 1549 l2_index, false); 1550 assert(keep_clusters <= nb_clusters); 1551 1552 *bytes = MIN(*bytes, 1553 keep_clusters * s->cluster_size 1554 - offset_into_cluster(s, guest_offset)); 1555 assert(*bytes != 0); 1556 1557 ret = calculate_l2_meta(bs, cluster_offset, guest_offset, 1558 *bytes, l2_slice, m, true); 1559 if (ret < 0) { 1560 goto out; 1561 } 1562 1563 ret = 1; 1564 } else { 1565 ret = 0; 1566 } 1567 1568 /* Cleanup */ 1569 out: 1570 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1571 1572 /* Only return a host offset if we actually made progress. Otherwise we 1573 * would make requirements for handle_alloc() that it can't fulfill */ 1574 if (ret > 0) { 1575 *host_offset = cluster_offset + offset_into_cluster(s, guest_offset); 1576 } 1577 1578 return ret; 1579 } 1580 1581 /* 1582 * Allocates new clusters for the given guest_offset. 1583 * 1584 * At most *nb_clusters are allocated, and on return *nb_clusters is updated to 1585 * contain the number of clusters that have been allocated and are contiguous 1586 * in the image file. 1587 * 1588 * If *host_offset is not INV_OFFSET, it specifies the offset in the image file 1589 * at which the new clusters must start. *nb_clusters can be 0 on return in 1590 * this case if the cluster at host_offset is already in use. If *host_offset 1591 * is INV_OFFSET, the clusters can be allocated anywhere in the image file. 1592 * 1593 * *host_offset is updated to contain the offset into the image file at which 1594 * the first allocated cluster starts. 1595 * 1596 * Return 0 on success and -errno in error cases. -EAGAIN means that the 1597 * function has been waiting for another request and the allocation must be 1598 * restarted, but the whole request should not be failed. 1599 */ 1600 static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, 1601 uint64_t *host_offset, uint64_t *nb_clusters) 1602 { 1603 BDRVQcow2State *s = bs->opaque; 1604 1605 trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, 1606 *host_offset, *nb_clusters); 1607 1608 if (has_data_file(bs)) { 1609 assert(*host_offset == INV_OFFSET || 1610 *host_offset == start_of_cluster(s, guest_offset)); 1611 *host_offset = start_of_cluster(s, guest_offset); 1612 return 0; 1613 } 1614 1615 /* Allocate new clusters */ 1616 trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); 1617 if (*host_offset == INV_OFFSET) { 1618 int64_t cluster_offset = 1619 qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); 1620 if (cluster_offset < 0) { 1621 return cluster_offset; 1622 } 1623 *host_offset = cluster_offset; 1624 return 0; 1625 } else { 1626 int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); 1627 if (ret < 0) { 1628 return ret; 1629 } 1630 *nb_clusters = ret; 1631 return 0; 1632 } 1633 } 1634 1635 /* 1636 * Allocates new clusters for an area that is either still unallocated or 1637 * cannot be overwritten in-place. If *host_offset is not INV_OFFSET, 1638 * clusters are only allocated if the new allocation can match the specified 1639 * host offset. 1640 * 1641 * Note that guest_offset may not be cluster aligned. In this case, the 1642 * returned *host_offset points to exact byte referenced by guest_offset and 1643 * therefore isn't cluster aligned as well. 1644 * 1645 * Returns: 1646 * 0: if no clusters could be allocated. *bytes is set to 0, 1647 * *host_offset is left unchanged. 1648 * 1649 * 1: if new clusters were allocated. *bytes may be decreased if the 1650 * new allocation doesn't cover all of the requested area. 1651 * *host_offset is updated to contain the host offset of the first 1652 * newly allocated cluster. 1653 * 1654 * -errno: in error cases 1655 */ 1656 static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, 1657 uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) 1658 { 1659 BDRVQcow2State *s = bs->opaque; 1660 int l2_index; 1661 uint64_t *l2_slice; 1662 uint64_t nb_clusters; 1663 int ret; 1664 1665 uint64_t alloc_cluster_offset; 1666 1667 trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, 1668 *bytes); 1669 assert(*bytes > 0); 1670 1671 /* 1672 * Calculate the number of clusters to look for. We stop at L2 slice 1673 * boundaries to keep things simple. 1674 */ 1675 nb_clusters = 1676 size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 1677 1678 l2_index = offset_to_l2_slice_index(s, guest_offset); 1679 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 1680 /* Limit total allocation byte count to BDRV_REQUEST_MAX_BYTES */ 1681 nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); 1682 1683 /* Find L2 entry for the first involved cluster */ 1684 ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); 1685 if (ret < 0) { 1686 return ret; 1687 } 1688 1689 nb_clusters = count_single_write_clusters(bs, nb_clusters, 1690 l2_slice, l2_index, true); 1691 1692 /* This function is only called when there were no non-COW clusters, so if 1693 * we can't find any unallocated or COW clusters either, something is 1694 * wrong with our code. */ 1695 assert(nb_clusters > 0); 1696 1697 /* Allocate at a given offset in the image file */ 1698 alloc_cluster_offset = *host_offset == INV_OFFSET ? INV_OFFSET : 1699 start_of_cluster(s, *host_offset); 1700 ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, 1701 &nb_clusters); 1702 if (ret < 0) { 1703 goto out; 1704 } 1705 1706 /* Can't extend contiguous allocation */ 1707 if (nb_clusters == 0) { 1708 *bytes = 0; 1709 ret = 0; 1710 goto out; 1711 } 1712 1713 assert(alloc_cluster_offset != INV_OFFSET); 1714 1715 /* 1716 * Save info needed for meta data update. 1717 * 1718 * requested_bytes: Number of bytes from the start of the first 1719 * newly allocated cluster to the end of the (possibly shortened 1720 * before) write request. 1721 * 1722 * avail_bytes: Number of bytes from the start of the first 1723 * newly allocated to the end of the last newly allocated cluster. 1724 * 1725 * nb_bytes: The number of bytes from the start of the first 1726 * newly allocated cluster to the end of the area that the write 1727 * request actually writes to (excluding COW at the end) 1728 */ 1729 uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset); 1730 int avail_bytes = nb_clusters << s->cluster_bits; 1731 int nb_bytes = MIN(requested_bytes, avail_bytes); 1732 1733 *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); 1734 *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset)); 1735 assert(*bytes != 0); 1736 1737 ret = calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes, 1738 l2_slice, m, false); 1739 if (ret < 0) { 1740 goto out; 1741 } 1742 1743 ret = 1; 1744 1745 out: 1746 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1747 return ret; 1748 } 1749 1750 /* 1751 * For a given area on the virtual disk defined by @offset and @bytes, 1752 * find the corresponding area on the qcow2 image, allocating new 1753 * clusters (or subclusters) if necessary. The result can span a 1754 * combination of allocated and previously unallocated clusters. 1755 * 1756 * Note that offset may not be cluster aligned. In this case, the returned 1757 * *host_offset points to exact byte referenced by offset and therefore 1758 * isn't cluster aligned as well. 1759 * 1760 * On return, @host_offset is set to the beginning of the requested 1761 * area. This area is guaranteed to be contiguous on the qcow2 file 1762 * but it can be smaller than initially requested. In this case @bytes 1763 * is updated with the actual size. 1764 * 1765 * If any clusters or subclusters were allocated then @m contains a 1766 * list with the information of all the affected regions. Note that 1767 * this can happen regardless of whether this function succeeds or 1768 * not. The caller is responsible for updating the L2 metadata of the 1769 * allocated clusters (on success) or freeing them (on failure), and 1770 * for clearing the contents of @m afterwards in both cases. 1771 * 1772 * If the request conflicts with another write request in flight, the coroutine 1773 * is queued and will be reentered when the dependency has completed. 1774 * 1775 * Return 0 on success and -errno in error cases 1776 */ 1777 int coroutine_fn qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, 1778 unsigned int *bytes, 1779 uint64_t *host_offset, 1780 QCowL2Meta **m) 1781 { 1782 BDRVQcow2State *s = bs->opaque; 1783 uint64_t start, remaining; 1784 uint64_t cluster_offset; 1785 uint64_t cur_bytes; 1786 int ret; 1787 1788 trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes); 1789 1790 again: 1791 start = offset; 1792 remaining = *bytes; 1793 cluster_offset = INV_OFFSET; 1794 *host_offset = INV_OFFSET; 1795 cur_bytes = 0; 1796 *m = NULL; 1797 1798 while (true) { 1799 1800 if (*host_offset == INV_OFFSET && cluster_offset != INV_OFFSET) { 1801 *host_offset = cluster_offset; 1802 } 1803 1804 assert(remaining >= cur_bytes); 1805 1806 start += cur_bytes; 1807 remaining -= cur_bytes; 1808 1809 if (cluster_offset != INV_OFFSET) { 1810 cluster_offset += cur_bytes; 1811 } 1812 1813 if (remaining == 0) { 1814 break; 1815 } 1816 1817 cur_bytes = remaining; 1818 1819 /* 1820 * Now start gathering as many contiguous clusters as possible: 1821 * 1822 * 1. Check for overlaps with in-flight allocations 1823 * 1824 * a) Overlap not in the first cluster -> shorten this request and 1825 * let the caller handle the rest in its next loop iteration. 1826 * 1827 * b) Real overlaps of two requests. Yield and restart the search 1828 * for contiguous clusters (the situation could have changed 1829 * while we were sleeping) 1830 * 1831 * c) TODO: Request starts in the same cluster as the in-flight 1832 * allocation ends. Shorten the COW of the in-fight allocation, 1833 * set cluster_offset to write to the same cluster and set up 1834 * the right synchronisation between the in-flight request and 1835 * the new one. 1836 */ 1837 ret = handle_dependencies(bs, start, &cur_bytes, m); 1838 if (ret == -EAGAIN) { 1839 /* Currently handle_dependencies() doesn't yield if we already had 1840 * an allocation. If it did, we would have to clean up the L2Meta 1841 * structs before starting over. */ 1842 assert(*m == NULL); 1843 goto again; 1844 } else if (ret < 0) { 1845 return ret; 1846 } else if (cur_bytes == 0) { 1847 break; 1848 } else { 1849 /* handle_dependencies() may have decreased cur_bytes (shortened 1850 * the allocations below) so that the next dependency is processed 1851 * correctly during the next loop iteration. */ 1852 } 1853 1854 /* 1855 * 2. Count contiguous COPIED clusters. 1856 */ 1857 ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); 1858 if (ret < 0) { 1859 return ret; 1860 } else if (ret) { 1861 continue; 1862 } else if (cur_bytes == 0) { 1863 break; 1864 } 1865 1866 /* 1867 * 3. If the request still hasn't completed, allocate new clusters, 1868 * considering any cluster_offset of steps 1c or 2. 1869 */ 1870 ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); 1871 if (ret < 0) { 1872 return ret; 1873 } else if (ret) { 1874 continue; 1875 } else { 1876 assert(cur_bytes == 0); 1877 break; 1878 } 1879 } 1880 1881 *bytes -= remaining; 1882 assert(*bytes > 0); 1883 assert(*host_offset != INV_OFFSET); 1884 assert(offset_into_cluster(s, *host_offset) == 1885 offset_into_cluster(s, offset)); 1886 1887 return 0; 1888 } 1889 1890 /* 1891 * This discards as many clusters of nb_clusters as possible at once (i.e. 1892 * all clusters in the same L2 slice) and returns the number of discarded 1893 * clusters. 1894 */ 1895 static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, 1896 uint64_t nb_clusters, 1897 enum qcow2_discard_type type, bool full_discard) 1898 { 1899 BDRVQcow2State *s = bs->opaque; 1900 uint64_t *l2_slice; 1901 int l2_index; 1902 int ret; 1903 int i; 1904 1905 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 1906 if (ret < 0) { 1907 return ret; 1908 } 1909 1910 /* Limit nb_clusters to one L2 slice */ 1911 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 1912 assert(nb_clusters <= INT_MAX); 1913 1914 for (i = 0; i < nb_clusters; i++) { 1915 uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1916 uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1917 uint64_t new_l2_entry = old_l2_entry; 1918 uint64_t new_l2_bitmap = old_l2_bitmap; 1919 QCow2ClusterType cluster_type = 1920 qcow2_get_cluster_type(bs, old_l2_entry); 1921 1922 /* 1923 * If full_discard is true, the cluster should not read back as zeroes, 1924 * but rather fall through to the backing file. 1925 * 1926 * If full_discard is false, make sure that a discarded area reads back 1927 * as zeroes for v3 images (we cannot do it for v2 without actually 1928 * writing a zero-filled buffer). We can skip the operation if the 1929 * cluster is already marked as zero, or if it's unallocated and we 1930 * don't have a backing file. 1931 * 1932 * TODO We might want to use bdrv_block_status(bs) here, but we're 1933 * holding s->lock, so that doesn't work today. 1934 */ 1935 if (full_discard) { 1936 new_l2_entry = new_l2_bitmap = 0; 1937 } else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) { 1938 if (has_subclusters(s)) { 1939 new_l2_entry = 0; 1940 new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; 1941 } else { 1942 new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0; 1943 } 1944 } 1945 1946 if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { 1947 continue; 1948 } 1949 1950 /* First remove L2 entries */ 1951 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 1952 set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); 1953 if (has_subclusters(s)) { 1954 set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); 1955 } 1956 /* Then decrease the refcount */ 1957 qcow2_free_any_cluster(bs, old_l2_entry, type); 1958 } 1959 1960 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1961 1962 return nb_clusters; 1963 } 1964 1965 int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, 1966 uint64_t bytes, enum qcow2_discard_type type, 1967 bool full_discard) 1968 { 1969 BDRVQcow2State *s = bs->opaque; 1970 uint64_t end_offset = offset + bytes; 1971 uint64_t nb_clusters; 1972 int64_t cleared; 1973 int ret; 1974 1975 /* Caller must pass aligned values, except at image end */ 1976 assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); 1977 assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || 1978 end_offset == bs->total_sectors << BDRV_SECTOR_BITS); 1979 1980 nb_clusters = size_to_clusters(s, bytes); 1981 1982 s->cache_discards = true; 1983 1984 /* Each L2 slice is handled by its own loop iteration */ 1985 while (nb_clusters > 0) { 1986 cleared = discard_in_l2_slice(bs, offset, nb_clusters, type, 1987 full_discard); 1988 if (cleared < 0) { 1989 ret = cleared; 1990 goto fail; 1991 } 1992 1993 nb_clusters -= cleared; 1994 offset += (cleared * s->cluster_size); 1995 } 1996 1997 ret = 0; 1998 fail: 1999 s->cache_discards = false; 2000 qcow2_process_discards(bs, ret); 2001 2002 return ret; 2003 } 2004 2005 /* 2006 * This zeroes as many clusters of nb_clusters as possible at once (i.e. 2007 * all clusters in the same L2 slice) and returns the number of zeroed 2008 * clusters. 2009 */ 2010 static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, 2011 uint64_t nb_clusters, int flags) 2012 { 2013 BDRVQcow2State *s = bs->opaque; 2014 uint64_t *l2_slice; 2015 int l2_index; 2016 int ret; 2017 int i; 2018 2019 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 2020 if (ret < 0) { 2021 return ret; 2022 } 2023 2024 /* Limit nb_clusters to one L2 slice */ 2025 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 2026 assert(nb_clusters <= INT_MAX); 2027 2028 for (i = 0; i < nb_clusters; i++) { 2029 uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 2030 uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 2031 QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry); 2032 bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) || 2033 ((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type)); 2034 uint64_t new_l2_entry = unmap ? 0 : old_l2_entry; 2035 uint64_t new_l2_bitmap = old_l2_bitmap; 2036 2037 if (has_subclusters(s)) { 2038 new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; 2039 } else { 2040 new_l2_entry |= QCOW_OFLAG_ZERO; 2041 } 2042 2043 if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { 2044 continue; 2045 } 2046 2047 /* First update L2 entries */ 2048 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 2049 set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); 2050 if (has_subclusters(s)) { 2051 set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); 2052 } 2053 2054 /* Then decrease the refcount */ 2055 if (unmap) { 2056 qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST); 2057 } 2058 } 2059 2060 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2061 2062 return nb_clusters; 2063 } 2064 2065 static int zero_l2_subclusters(BlockDriverState *bs, uint64_t offset, 2066 unsigned nb_subclusters) 2067 { 2068 BDRVQcow2State *s = bs->opaque; 2069 uint64_t *l2_slice; 2070 uint64_t old_l2_bitmap, l2_bitmap; 2071 int l2_index, ret, sc = offset_to_sc_index(s, offset); 2072 2073 /* For full clusters use zero_in_l2_slice() instead */ 2074 assert(nb_subclusters > 0 && nb_subclusters < s->subclusters_per_cluster); 2075 assert(sc + nb_subclusters <= s->subclusters_per_cluster); 2076 assert(offset_into_subcluster(s, offset) == 0); 2077 2078 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 2079 if (ret < 0) { 2080 return ret; 2081 } 2082 2083 switch (qcow2_get_cluster_type(bs, get_l2_entry(s, l2_slice, l2_index))) { 2084 case QCOW2_CLUSTER_COMPRESSED: 2085 ret = -ENOTSUP; /* We cannot partially zeroize compressed clusters */ 2086 goto out; 2087 case QCOW2_CLUSTER_NORMAL: 2088 case QCOW2_CLUSTER_UNALLOCATED: 2089 break; 2090 default: 2091 g_assert_not_reached(); 2092 } 2093 2094 old_l2_bitmap = l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 2095 2096 l2_bitmap |= QCOW_OFLAG_SUB_ZERO_RANGE(sc, sc + nb_subclusters); 2097 l2_bitmap &= ~QCOW_OFLAG_SUB_ALLOC_RANGE(sc, sc + nb_subclusters); 2098 2099 if (old_l2_bitmap != l2_bitmap) { 2100 set_l2_bitmap(s, l2_slice, l2_index, l2_bitmap); 2101 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 2102 } 2103 2104 ret = 0; 2105 out: 2106 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2107 2108 return ret; 2109 } 2110 2111 int coroutine_fn qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, 2112 uint64_t bytes, int flags) 2113 { 2114 BDRVQcow2State *s = bs->opaque; 2115 uint64_t end_offset = offset + bytes; 2116 uint64_t nb_clusters; 2117 unsigned head, tail; 2118 int64_t cleared; 2119 int ret; 2120 2121 /* If we have to stay in sync with an external data file, zero out 2122 * s->data_file first. */ 2123 if (data_file_is_raw(bs)) { 2124 assert(has_data_file(bs)); 2125 ret = bdrv_co_pwrite_zeroes(s->data_file, offset, bytes, flags); 2126 if (ret < 0) { 2127 return ret; 2128 } 2129 } 2130 2131 /* Caller must pass aligned values, except at image end */ 2132 assert(offset_into_subcluster(s, offset) == 0); 2133 assert(offset_into_subcluster(s, end_offset) == 0 || 2134 end_offset >= bs->total_sectors << BDRV_SECTOR_BITS); 2135 2136 /* 2137 * The zero flag is only supported by version 3 and newer. However, if we 2138 * have no backing file, we can resort to discard in version 2. 2139 */ 2140 if (s->qcow_version < 3) { 2141 if (!bs->backing) { 2142 return qcow2_cluster_discard(bs, offset, bytes, 2143 QCOW2_DISCARD_REQUEST, false); 2144 } 2145 return -ENOTSUP; 2146 } 2147 2148 head = MIN(end_offset, ROUND_UP(offset, s->cluster_size)) - offset; 2149 offset += head; 2150 2151 tail = (end_offset >= bs->total_sectors << BDRV_SECTOR_BITS) ? 0 : 2152 end_offset - MAX(offset, start_of_cluster(s, end_offset)); 2153 end_offset -= tail; 2154 2155 s->cache_discards = true; 2156 2157 if (head) { 2158 ret = zero_l2_subclusters(bs, offset - head, 2159 size_to_subclusters(s, head)); 2160 if (ret < 0) { 2161 goto fail; 2162 } 2163 } 2164 2165 /* Each L2 slice is handled by its own loop iteration */ 2166 nb_clusters = size_to_clusters(s, end_offset - offset); 2167 2168 while (nb_clusters > 0) { 2169 cleared = zero_in_l2_slice(bs, offset, nb_clusters, flags); 2170 if (cleared < 0) { 2171 ret = cleared; 2172 goto fail; 2173 } 2174 2175 nb_clusters -= cleared; 2176 offset += (cleared * s->cluster_size); 2177 } 2178 2179 if (tail) { 2180 ret = zero_l2_subclusters(bs, end_offset, size_to_subclusters(s, tail)); 2181 if (ret < 0) { 2182 goto fail; 2183 } 2184 } 2185 2186 ret = 0; 2187 fail: 2188 s->cache_discards = false; 2189 qcow2_process_discards(bs, ret); 2190 2191 return ret; 2192 } 2193 2194 /* 2195 * Expands all zero clusters in a specific L1 table (or deallocates them, for 2196 * non-backed non-pre-allocated zero clusters). 2197 * 2198 * l1_entries and *visited_l1_entries are used to keep track of progress for 2199 * status_cb(). l1_entries contains the total number of L1 entries and 2200 * *visited_l1_entries counts all visited L1 entries. 2201 */ 2202 static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, 2203 int l1_size, int64_t *visited_l1_entries, 2204 int64_t l1_entries, 2205 BlockDriverAmendStatusCB *status_cb, 2206 void *cb_opaque) 2207 { 2208 BDRVQcow2State *s = bs->opaque; 2209 bool is_active_l1 = (l1_table == s->l1_table); 2210 uint64_t *l2_slice = NULL; 2211 unsigned slice, slice_size2, n_slices; 2212 int ret; 2213 int i, j; 2214 2215 /* qcow2_downgrade() is not allowed in images with subclusters */ 2216 assert(!has_subclusters(s)); 2217 2218 slice_size2 = s->l2_slice_size * l2_entry_size(s); 2219 n_slices = s->cluster_size / slice_size2; 2220 2221 if (!is_active_l1) { 2222 /* inactive L2 tables require a buffer to be stored in when loading 2223 * them from disk */ 2224 l2_slice = qemu_try_blockalign(bs->file->bs, slice_size2); 2225 if (l2_slice == NULL) { 2226 return -ENOMEM; 2227 } 2228 } 2229 2230 for (i = 0; i < l1_size; i++) { 2231 uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; 2232 uint64_t l2_refcount; 2233 2234 if (!l2_offset) { 2235 /* unallocated */ 2236 (*visited_l1_entries)++; 2237 if (status_cb) { 2238 status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 2239 } 2240 continue; 2241 } 2242 2243 if (offset_into_cluster(s, l2_offset)) { 2244 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" 2245 PRIx64 " unaligned (L1 index: %#x)", 2246 l2_offset, i); 2247 ret = -EIO; 2248 goto fail; 2249 } 2250 2251 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, 2252 &l2_refcount); 2253 if (ret < 0) { 2254 goto fail; 2255 } 2256 2257 for (slice = 0; slice < n_slices; slice++) { 2258 uint64_t slice_offset = l2_offset + slice * slice_size2; 2259 bool l2_dirty = false; 2260 if (is_active_l1) { 2261 /* get active L2 tables from cache */ 2262 ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset, 2263 (void **)&l2_slice); 2264 } else { 2265 /* load inactive L2 tables from disk */ 2266 ret = bdrv_pread(bs->file, slice_offset, slice_size2, 2267 l2_slice, 0); 2268 } 2269 if (ret < 0) { 2270 goto fail; 2271 } 2272 2273 for (j = 0; j < s->l2_slice_size; j++) { 2274 uint64_t l2_entry = get_l2_entry(s, l2_slice, j); 2275 int64_t offset = l2_entry & L2E_OFFSET_MASK; 2276 QCow2ClusterType cluster_type = 2277 qcow2_get_cluster_type(bs, l2_entry); 2278 2279 if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN && 2280 cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) { 2281 continue; 2282 } 2283 2284 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2285 if (!bs->backing) { 2286 /* 2287 * not backed; therefore we can simply deallocate the 2288 * cluster. No need to call set_l2_bitmap(), this 2289 * function doesn't support images with subclusters. 2290 */ 2291 set_l2_entry(s, l2_slice, j, 0); 2292 l2_dirty = true; 2293 continue; 2294 } 2295 2296 offset = qcow2_alloc_clusters(bs, s->cluster_size); 2297 if (offset < 0) { 2298 ret = offset; 2299 goto fail; 2300 } 2301 2302 /* The offset must fit in the offset field */ 2303 assert((offset & L2E_OFFSET_MASK) == offset); 2304 2305 if (l2_refcount > 1) { 2306 /* For shared L2 tables, set the refcount accordingly 2307 * (it is already 1 and needs to be l2_refcount) */ 2308 ret = qcow2_update_cluster_refcount( 2309 bs, offset >> s->cluster_bits, 2310 refcount_diff(1, l2_refcount), false, 2311 QCOW2_DISCARD_OTHER); 2312 if (ret < 0) { 2313 qcow2_free_clusters(bs, offset, s->cluster_size, 2314 QCOW2_DISCARD_OTHER); 2315 goto fail; 2316 } 2317 } 2318 } 2319 2320 if (offset_into_cluster(s, offset)) { 2321 int l2_index = slice * s->l2_slice_size + j; 2322 qcow2_signal_corruption( 2323 bs, true, -1, -1, 2324 "Cluster allocation offset " 2325 "%#" PRIx64 " unaligned (L2 offset: %#" 2326 PRIx64 ", L2 index: %#x)", offset, 2327 l2_offset, l2_index); 2328 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2329 qcow2_free_clusters(bs, offset, s->cluster_size, 2330 QCOW2_DISCARD_ALWAYS); 2331 } 2332 ret = -EIO; 2333 goto fail; 2334 } 2335 2336 ret = qcow2_pre_write_overlap_check(bs, 0, offset, 2337 s->cluster_size, true); 2338 if (ret < 0) { 2339 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2340 qcow2_free_clusters(bs, offset, s->cluster_size, 2341 QCOW2_DISCARD_ALWAYS); 2342 } 2343 goto fail; 2344 } 2345 2346 ret = bdrv_pwrite_zeroes(s->data_file, offset, 2347 s->cluster_size, 0); 2348 if (ret < 0) { 2349 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2350 qcow2_free_clusters(bs, offset, s->cluster_size, 2351 QCOW2_DISCARD_ALWAYS); 2352 } 2353 goto fail; 2354 } 2355 2356 if (l2_refcount == 1) { 2357 set_l2_entry(s, l2_slice, j, offset | QCOW_OFLAG_COPIED); 2358 } else { 2359 set_l2_entry(s, l2_slice, j, offset); 2360 } 2361 /* 2362 * No need to call set_l2_bitmap() after set_l2_entry() because 2363 * this function doesn't support images with subclusters. 2364 */ 2365 l2_dirty = true; 2366 } 2367 2368 if (is_active_l1) { 2369 if (l2_dirty) { 2370 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 2371 qcow2_cache_depends_on_flush(s->l2_table_cache); 2372 } 2373 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2374 } else { 2375 if (l2_dirty) { 2376 ret = qcow2_pre_write_overlap_check( 2377 bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, 2378 slice_offset, slice_size2, false); 2379 if (ret < 0) { 2380 goto fail; 2381 } 2382 2383 ret = bdrv_pwrite(bs->file, slice_offset, slice_size2, 2384 l2_slice, 0); 2385 if (ret < 0) { 2386 goto fail; 2387 } 2388 } 2389 } 2390 } 2391 2392 (*visited_l1_entries)++; 2393 if (status_cb) { 2394 status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 2395 } 2396 } 2397 2398 ret = 0; 2399 2400 fail: 2401 if (l2_slice) { 2402 if (!is_active_l1) { 2403 qemu_vfree(l2_slice); 2404 } else { 2405 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2406 } 2407 } 2408 return ret; 2409 } 2410 2411 /* 2412 * For backed images, expands all zero clusters on the image. For non-backed 2413 * images, deallocates all non-pre-allocated zero clusters (and claims the 2414 * allocation for pre-allocated ones). This is important for downgrading to a 2415 * qcow2 version which doesn't yet support metadata zero clusters. 2416 */ 2417 int qcow2_expand_zero_clusters(BlockDriverState *bs, 2418 BlockDriverAmendStatusCB *status_cb, 2419 void *cb_opaque) 2420 { 2421 BDRVQcow2State *s = bs->opaque; 2422 uint64_t *l1_table = NULL; 2423 int64_t l1_entries = 0, visited_l1_entries = 0; 2424 int ret; 2425 int i, j; 2426 2427 if (status_cb) { 2428 l1_entries = s->l1_size; 2429 for (i = 0; i < s->nb_snapshots; i++) { 2430 l1_entries += s->snapshots[i].l1_size; 2431 } 2432 } 2433 2434 ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, 2435 &visited_l1_entries, l1_entries, 2436 status_cb, cb_opaque); 2437 if (ret < 0) { 2438 goto fail; 2439 } 2440 2441 /* Inactive L1 tables may point to active L2 tables - therefore it is 2442 * necessary to flush the L2 table cache before trying to access the L2 2443 * tables pointed to by inactive L1 entries (else we might try to expand 2444 * zero clusters that have already been expanded); furthermore, it is also 2445 * necessary to empty the L2 table cache, since it may contain tables which 2446 * are now going to be modified directly on disk, bypassing the cache. 2447 * qcow2_cache_empty() does both for us. */ 2448 ret = qcow2_cache_empty(bs, s->l2_table_cache); 2449 if (ret < 0) { 2450 goto fail; 2451 } 2452 2453 for (i = 0; i < s->nb_snapshots; i++) { 2454 int l1_size2; 2455 uint64_t *new_l1_table; 2456 Error *local_err = NULL; 2457 2458 ret = qcow2_validate_table(bs, s->snapshots[i].l1_table_offset, 2459 s->snapshots[i].l1_size, L1E_SIZE, 2460 QCOW_MAX_L1_SIZE, "Snapshot L1 table", 2461 &local_err); 2462 if (ret < 0) { 2463 error_report_err(local_err); 2464 goto fail; 2465 } 2466 2467 l1_size2 = s->snapshots[i].l1_size * L1E_SIZE; 2468 new_l1_table = g_try_realloc(l1_table, l1_size2); 2469 2470 if (!new_l1_table) { 2471 ret = -ENOMEM; 2472 goto fail; 2473 } 2474 2475 l1_table = new_l1_table; 2476 2477 ret = bdrv_pread(bs->file, s->snapshots[i].l1_table_offset, l1_size2, 2478 l1_table, 0); 2479 if (ret < 0) { 2480 goto fail; 2481 } 2482 2483 for (j = 0; j < s->snapshots[i].l1_size; j++) { 2484 be64_to_cpus(&l1_table[j]); 2485 } 2486 2487 ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, 2488 &visited_l1_entries, l1_entries, 2489 status_cb, cb_opaque); 2490 if (ret < 0) { 2491 goto fail; 2492 } 2493 } 2494 2495 ret = 0; 2496 2497 fail: 2498 g_free(l1_table); 2499 return ret; 2500 } 2501 2502 void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry, 2503 uint64_t *coffset, int *csize) 2504 { 2505 BDRVQcow2State *s = bs->opaque; 2506 int nb_csectors; 2507 2508 assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED); 2509 2510 *coffset = l2_entry & s->cluster_offset_mask; 2511 2512 nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1; 2513 *csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE - 2514 (*coffset & (QCOW2_COMPRESSED_SECTOR_SIZE - 1)); 2515 } 2516