1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include <zlib.h> 27 28 #include "qapi/error.h" 29 #include "qcow2.h" 30 #include "qemu/bswap.h" 31 #include "qemu/memalign.h" 32 #include "trace.h" 33 34 int coroutine_fn qcow2_shrink_l1_table(BlockDriverState *bs, 35 uint64_t exact_size) 36 { 37 BDRVQcow2State *s = bs->opaque; 38 int new_l1_size, i, ret; 39 40 if (exact_size >= s->l1_size) { 41 return 0; 42 } 43 44 new_l1_size = exact_size; 45 46 #ifdef DEBUG_ALLOC2 47 fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size); 48 #endif 49 50 BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE); 51 ret = bdrv_co_pwrite_zeroes(bs->file, 52 s->l1_table_offset + new_l1_size * L1E_SIZE, 53 (s->l1_size - new_l1_size) * L1E_SIZE, 0); 54 if (ret < 0) { 55 goto fail; 56 } 57 58 ret = bdrv_co_flush(bs->file->bs); 59 if (ret < 0) { 60 goto fail; 61 } 62 63 BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS); 64 for (i = s->l1_size - 1; i > new_l1_size - 1; i--) { 65 if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) { 66 continue; 67 } 68 qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK, 69 s->cluster_size, QCOW2_DISCARD_ALWAYS); 70 s->l1_table[i] = 0; 71 } 72 return 0; 73 74 fail: 75 /* 76 * If the write in the l1_table failed the image may contain a partially 77 * overwritten l1_table. In this case it would be better to clear the 78 * l1_table in memory to avoid possible image corruption. 79 */ 80 memset(s->l1_table + new_l1_size, 0, 81 (s->l1_size - new_l1_size) * L1E_SIZE); 82 return ret; 83 } 84 85 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, 86 bool exact_size) 87 { 88 BDRVQcow2State *s = bs->opaque; 89 int new_l1_size2, ret, i; 90 uint64_t *new_l1_table; 91 int64_t old_l1_table_offset, old_l1_size; 92 int64_t new_l1_table_offset, new_l1_size; 93 uint8_t data[12]; 94 95 if (min_size <= s->l1_size) 96 return 0; 97 98 /* Do a sanity check on min_size before trying to calculate new_l1_size 99 * (this prevents overflows during the while loop for the calculation of 100 * new_l1_size) */ 101 if (min_size > INT_MAX / L1E_SIZE) { 102 return -EFBIG; 103 } 104 105 if (exact_size) { 106 new_l1_size = min_size; 107 } else { 108 /* Bump size up to reduce the number of times we have to grow */ 109 new_l1_size = s->l1_size; 110 if (new_l1_size == 0) { 111 new_l1_size = 1; 112 } 113 while (min_size > new_l1_size) { 114 new_l1_size = DIV_ROUND_UP(new_l1_size * 3, 2); 115 } 116 } 117 118 QEMU_BUILD_BUG_ON(QCOW_MAX_L1_SIZE > INT_MAX); 119 if (new_l1_size > QCOW_MAX_L1_SIZE / L1E_SIZE) { 120 return -EFBIG; 121 } 122 123 #ifdef DEBUG_ALLOC2 124 fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", 125 s->l1_size, new_l1_size); 126 #endif 127 128 new_l1_size2 = L1E_SIZE * new_l1_size; 129 new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_size2); 130 if (new_l1_table == NULL) { 131 return -ENOMEM; 132 } 133 memset(new_l1_table, 0, new_l1_size2); 134 135 if (s->l1_size) { 136 memcpy(new_l1_table, s->l1_table, s->l1_size * L1E_SIZE); 137 } 138 139 /* write new table (align to cluster) */ 140 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); 141 new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); 142 if (new_l1_table_offset < 0) { 143 qemu_vfree(new_l1_table); 144 return new_l1_table_offset; 145 } 146 147 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 148 if (ret < 0) { 149 goto fail; 150 } 151 152 /* the L1 position has not yet been updated, so these clusters must 153 * indeed be completely free */ 154 ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset, 155 new_l1_size2, false); 156 if (ret < 0) { 157 goto fail; 158 } 159 160 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); 161 for(i = 0; i < s->l1_size; i++) 162 new_l1_table[i] = cpu_to_be64(new_l1_table[i]); 163 ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_size2, 164 new_l1_table, 0); 165 if (ret < 0) 166 goto fail; 167 for(i = 0; i < s->l1_size; i++) 168 new_l1_table[i] = be64_to_cpu(new_l1_table[i]); 169 170 /* set new table */ 171 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); 172 stl_be_p(data, new_l1_size); 173 stq_be_p(data + 4, new_l1_table_offset); 174 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), 175 sizeof(data), data, 0); 176 if (ret < 0) { 177 goto fail; 178 } 179 qemu_vfree(s->l1_table); 180 old_l1_table_offset = s->l1_table_offset; 181 s->l1_table_offset = new_l1_table_offset; 182 s->l1_table = new_l1_table; 183 old_l1_size = s->l1_size; 184 s->l1_size = new_l1_size; 185 qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * L1E_SIZE, 186 QCOW2_DISCARD_OTHER); 187 return 0; 188 fail: 189 qemu_vfree(new_l1_table); 190 qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, 191 QCOW2_DISCARD_OTHER); 192 return ret; 193 } 194 195 /* 196 * l2_load 197 * 198 * @bs: The BlockDriverState 199 * @offset: A guest offset, used to calculate what slice of the L2 200 * table to load. 201 * @l2_offset: Offset to the L2 table in the image file. 202 * @l2_slice: Location to store the pointer to the L2 slice. 203 * 204 * Loads a L2 slice into memory (L2 slices are the parts of L2 tables 205 * that are loaded by the qcow2 cache). If the slice is in the cache, 206 * the cache is used; otherwise the L2 slice is loaded from the image 207 * file. 208 */ 209 static int l2_load(BlockDriverState *bs, uint64_t offset, 210 uint64_t l2_offset, uint64_t **l2_slice) 211 { 212 BDRVQcow2State *s = bs->opaque; 213 int start_of_slice = l2_entry_size(s) * 214 (offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset)); 215 216 return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice, 217 (void **)l2_slice); 218 } 219 220 /* 221 * Writes an L1 entry to disk (note that depending on the alignment 222 * requirements this function may write more that just one entry in 223 * order to prevent bdrv_pwrite from performing a read-modify-write) 224 */ 225 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) 226 { 227 BDRVQcow2State *s = bs->opaque; 228 int l1_start_index; 229 int i, ret; 230 int bufsize = MAX(L1E_SIZE, 231 MIN(bs->file->bs->bl.request_alignment, s->cluster_size)); 232 int nentries = bufsize / L1E_SIZE; 233 g_autofree uint64_t *buf = g_try_new0(uint64_t, nentries); 234 235 if (buf == NULL) { 236 return -ENOMEM; 237 } 238 239 l1_start_index = QEMU_ALIGN_DOWN(l1_index, nentries); 240 for (i = 0; i < MIN(nentries, s->l1_size - l1_start_index); i++) { 241 buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); 242 } 243 244 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1, 245 s->l1_table_offset + L1E_SIZE * l1_start_index, bufsize, false); 246 if (ret < 0) { 247 return ret; 248 } 249 250 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 251 ret = bdrv_pwrite_sync(bs->file, 252 s->l1_table_offset + L1E_SIZE * l1_start_index, 253 bufsize, buf, 0); 254 if (ret < 0) { 255 return ret; 256 } 257 258 return 0; 259 } 260 261 /* 262 * l2_allocate 263 * 264 * Allocate a new l2 entry in the file. If l1_index points to an already 265 * used entry in the L2 table (i.e. we are doing a copy on write for the L2 266 * table) copy the contents of the old L2 table into the newly allocated one. 267 * Otherwise the new table is initialized with zeros. 268 * 269 */ 270 271 static int l2_allocate(BlockDriverState *bs, int l1_index) 272 { 273 BDRVQcow2State *s = bs->opaque; 274 uint64_t old_l2_offset; 275 uint64_t *l2_slice = NULL; 276 unsigned slice, slice_size2, n_slices; 277 int64_t l2_offset; 278 int ret; 279 280 old_l2_offset = s->l1_table[l1_index]; 281 282 trace_qcow2_l2_allocate(bs, l1_index); 283 284 /* allocate a new l2 entry */ 285 286 l2_offset = qcow2_alloc_clusters(bs, s->l2_size * l2_entry_size(s)); 287 if (l2_offset < 0) { 288 ret = l2_offset; 289 goto fail; 290 } 291 292 /* The offset must fit in the offset field of the L1 table entry */ 293 assert((l2_offset & L1E_OFFSET_MASK) == l2_offset); 294 295 /* If we're allocating the table at offset 0 then something is wrong */ 296 if (l2_offset == 0) { 297 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid " 298 "allocation of L2 table at offset 0"); 299 ret = -EIO; 300 goto fail; 301 } 302 303 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 304 if (ret < 0) { 305 goto fail; 306 } 307 308 /* allocate a new entry in the l2 cache */ 309 310 slice_size2 = s->l2_slice_size * l2_entry_size(s); 311 n_slices = s->cluster_size / slice_size2; 312 313 trace_qcow2_l2_allocate_get_empty(bs, l1_index); 314 for (slice = 0; slice < n_slices; slice++) { 315 ret = qcow2_cache_get_empty(bs, s->l2_table_cache, 316 l2_offset + slice * slice_size2, 317 (void **) &l2_slice); 318 if (ret < 0) { 319 goto fail; 320 } 321 322 if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { 323 /* if there was no old l2 table, clear the new slice */ 324 memset(l2_slice, 0, slice_size2); 325 } else { 326 uint64_t *old_slice; 327 uint64_t old_l2_slice_offset = 328 (old_l2_offset & L1E_OFFSET_MASK) + slice * slice_size2; 329 330 /* if there was an old l2 table, read a slice from the disk */ 331 BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); 332 ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_slice_offset, 333 (void **) &old_slice); 334 if (ret < 0) { 335 goto fail; 336 } 337 338 memcpy(l2_slice, old_slice, slice_size2); 339 340 qcow2_cache_put(s->l2_table_cache, (void **) &old_slice); 341 } 342 343 /* write the l2 slice to the file */ 344 BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); 345 346 trace_qcow2_l2_allocate_write_l2(bs, l1_index); 347 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 348 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 349 } 350 351 ret = qcow2_cache_flush(bs, s->l2_table_cache); 352 if (ret < 0) { 353 goto fail; 354 } 355 356 /* update the L1 entry */ 357 trace_qcow2_l2_allocate_write_l1(bs, l1_index); 358 s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; 359 ret = qcow2_write_l1_entry(bs, l1_index); 360 if (ret < 0) { 361 goto fail; 362 } 363 364 trace_qcow2_l2_allocate_done(bs, l1_index, 0); 365 return 0; 366 367 fail: 368 trace_qcow2_l2_allocate_done(bs, l1_index, ret); 369 if (l2_slice != NULL) { 370 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 371 } 372 s->l1_table[l1_index] = old_l2_offset; 373 if (l2_offset > 0) { 374 qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), 375 QCOW2_DISCARD_ALWAYS); 376 } 377 return ret; 378 } 379 380 /* 381 * For a given L2 entry, count the number of contiguous subclusters of 382 * the same type starting from @sc_from. Compressed clusters are 383 * treated as if they were divided into subclusters of size 384 * s->subcluster_size. 385 * 386 * Return the number of contiguous subclusters and set @type to the 387 * subcluster type. 388 * 389 * If the L2 entry is invalid return -errno and set @type to 390 * QCOW2_SUBCLUSTER_INVALID. 391 */ 392 static int qcow2_get_subcluster_range_type(BlockDriverState *bs, 393 uint64_t l2_entry, 394 uint64_t l2_bitmap, 395 unsigned sc_from, 396 QCow2SubclusterType *type) 397 { 398 BDRVQcow2State *s = bs->opaque; 399 uint32_t val; 400 401 *type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_from); 402 403 if (*type == QCOW2_SUBCLUSTER_INVALID) { 404 return -EINVAL; 405 } else if (!has_subclusters(s) || *type == QCOW2_SUBCLUSTER_COMPRESSED) { 406 return s->subclusters_per_cluster - sc_from; 407 } 408 409 switch (*type) { 410 case QCOW2_SUBCLUSTER_NORMAL: 411 val = l2_bitmap | QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); 412 return cto32(val) - sc_from; 413 414 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 415 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 416 val = (l2_bitmap | QCOW_OFLAG_SUB_ZERO_RANGE(0, sc_from)) >> 32; 417 return cto32(val) - sc_from; 418 419 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 420 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 421 val = ((l2_bitmap >> 32) | l2_bitmap) 422 & ~QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); 423 return ctz32(val) - sc_from; 424 425 default: 426 g_assert_not_reached(); 427 } 428 } 429 430 /* 431 * Return the number of contiguous subclusters of the exact same type 432 * in a given L2 slice, starting from cluster @l2_index, subcluster 433 * @sc_index. Allocated subclusters are required to be contiguous in 434 * the image file. 435 * At most @nb_clusters are checked (note that this means clusters, 436 * not subclusters). 437 * Compressed clusters are always processed one by one but for the 438 * purpose of this count they are treated as if they were divided into 439 * subclusters of size s->subcluster_size. 440 * On failure return -errno and update @l2_index to point to the 441 * invalid entry. 442 */ 443 static int count_contiguous_subclusters(BlockDriverState *bs, int nb_clusters, 444 unsigned sc_index, uint64_t *l2_slice, 445 unsigned *l2_index) 446 { 447 BDRVQcow2State *s = bs->opaque; 448 int i, count = 0; 449 bool check_offset = false; 450 uint64_t expected_offset = 0; 451 QCow2SubclusterType expected_type = QCOW2_SUBCLUSTER_NORMAL, type; 452 453 assert(*l2_index + nb_clusters <= s->l2_slice_size); 454 455 for (i = 0; i < nb_clusters; i++) { 456 unsigned first_sc = (i == 0) ? sc_index : 0; 457 uint64_t l2_entry = get_l2_entry(s, l2_slice, *l2_index + i); 458 uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, *l2_index + i); 459 int ret = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, 460 first_sc, &type); 461 if (ret < 0) { 462 *l2_index += i; /* Point to the invalid entry */ 463 return -EIO; 464 } 465 if (i == 0) { 466 if (type == QCOW2_SUBCLUSTER_COMPRESSED) { 467 /* Compressed clusters are always processed one by one */ 468 return ret; 469 } 470 expected_type = type; 471 expected_offset = l2_entry & L2E_OFFSET_MASK; 472 check_offset = (type == QCOW2_SUBCLUSTER_NORMAL || 473 type == QCOW2_SUBCLUSTER_ZERO_ALLOC || 474 type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC); 475 } else if (type != expected_type) { 476 break; 477 } else if (check_offset) { 478 expected_offset += s->cluster_size; 479 if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { 480 break; 481 } 482 } 483 count += ret; 484 /* Stop if there are type changes before the end of the cluster */ 485 if (first_sc + ret < s->subclusters_per_cluster) { 486 break; 487 } 488 } 489 490 return count; 491 } 492 493 static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, 494 uint64_t src_cluster_offset, 495 unsigned offset_in_cluster, 496 QEMUIOVector *qiov) 497 { 498 int ret; 499 500 if (qiov->size == 0) { 501 return 0; 502 } 503 504 BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); 505 506 if (!bs->drv) { 507 return -ENOMEDIUM; 508 } 509 510 /* 511 * We never deal with requests that don't satisfy 512 * bdrv_check_qiov_request(), and aligning requests to clusters never 513 * breaks this condition. So, do some assertions before calling 514 * bs->drv->bdrv_co_preadv_part() which has int64_t arguments. 515 */ 516 assert(src_cluster_offset <= INT64_MAX); 517 assert(src_cluster_offset + offset_in_cluster <= INT64_MAX); 518 /* Cast qiov->size to uint64_t to silence a compiler warning on -m32 */ 519 assert((uint64_t)qiov->size <= INT64_MAX); 520 bdrv_check_qiov_request(src_cluster_offset + offset_in_cluster, qiov->size, 521 qiov, 0, &error_abort); 522 /* 523 * Call .bdrv_co_readv() directly instead of using the public block-layer 524 * interface. This avoids double I/O throttling and request tracking, 525 * which can lead to deadlock when block layer copy-on-read is enabled. 526 */ 527 ret = bs->drv->bdrv_co_preadv_part(bs, 528 src_cluster_offset + offset_in_cluster, 529 qiov->size, qiov, 0, 0); 530 if (ret < 0) { 531 return ret; 532 } 533 534 return 0; 535 } 536 537 static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, 538 uint64_t cluster_offset, 539 unsigned offset_in_cluster, 540 QEMUIOVector *qiov) 541 { 542 BDRVQcow2State *s = bs->opaque; 543 int ret; 544 545 if (qiov->size == 0) { 546 return 0; 547 } 548 549 ret = qcow2_pre_write_overlap_check(bs, 0, 550 cluster_offset + offset_in_cluster, qiov->size, true); 551 if (ret < 0) { 552 return ret; 553 } 554 555 BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); 556 ret = bdrv_co_pwritev(s->data_file, cluster_offset + offset_in_cluster, 557 qiov->size, qiov, 0); 558 if (ret < 0) { 559 return ret; 560 } 561 562 return 0; 563 } 564 565 566 /* 567 * get_host_offset 568 * 569 * For a given offset of the virtual disk find the equivalent host 570 * offset in the qcow2 file and store it in *host_offset. Neither 571 * offset needs to be aligned to a cluster boundary. 572 * 573 * If the cluster is unallocated then *host_offset will be 0. 574 * If the cluster is compressed then *host_offset will contain the l2 entry. 575 * 576 * On entry, *bytes is the maximum number of contiguous bytes starting at 577 * offset that we are interested in. 578 * 579 * On exit, *bytes is the number of bytes starting at offset that have the same 580 * subcluster type and (if applicable) are stored contiguously in the image 581 * file. The subcluster type is stored in *subcluster_type. 582 * Compressed clusters are always processed one by one. 583 * 584 * Returns 0 on success, -errno in error cases. 585 */ 586 int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, 587 unsigned int *bytes, uint64_t *host_offset, 588 QCow2SubclusterType *subcluster_type) 589 { 590 BDRVQcow2State *s = bs->opaque; 591 unsigned int l2_index, sc_index; 592 uint64_t l1_index, l2_offset, *l2_slice, l2_entry, l2_bitmap; 593 int sc; 594 unsigned int offset_in_cluster; 595 uint64_t bytes_available, bytes_needed, nb_clusters; 596 QCow2SubclusterType type; 597 int ret; 598 599 offset_in_cluster = offset_into_cluster(s, offset); 600 bytes_needed = (uint64_t) *bytes + offset_in_cluster; 601 602 /* compute how many bytes there are between the start of the cluster 603 * containing offset and the end of the l2 slice that contains 604 * the entry pointing to it */ 605 bytes_available = 606 ((uint64_t) (s->l2_slice_size - offset_to_l2_slice_index(s, offset))) 607 << s->cluster_bits; 608 609 if (bytes_needed > bytes_available) { 610 bytes_needed = bytes_available; 611 } 612 613 *host_offset = 0; 614 615 /* seek to the l2 offset in the l1 table */ 616 617 l1_index = offset_to_l1_index(s, offset); 618 if (l1_index >= s->l1_size) { 619 type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; 620 goto out; 621 } 622 623 l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 624 if (!l2_offset) { 625 type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; 626 goto out; 627 } 628 629 if (offset_into_cluster(s, l2_offset)) { 630 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 631 " unaligned (L1 index: %#" PRIx64 ")", 632 l2_offset, l1_index); 633 return -EIO; 634 } 635 636 /* load the l2 slice in memory */ 637 638 ret = l2_load(bs, offset, l2_offset, &l2_slice); 639 if (ret < 0) { 640 return ret; 641 } 642 643 /* find the cluster offset for the given disk offset */ 644 645 l2_index = offset_to_l2_slice_index(s, offset); 646 sc_index = offset_to_sc_index(s, offset); 647 l2_entry = get_l2_entry(s, l2_slice, l2_index); 648 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 649 650 nb_clusters = size_to_clusters(s, bytes_needed); 651 /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned 652 * integers; the minimum cluster size is 512, so this assertion is always 653 * true */ 654 assert(nb_clusters <= INT_MAX); 655 656 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 657 if (s->qcow_version < 3 && (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || 658 type == QCOW2_SUBCLUSTER_ZERO_ALLOC)) { 659 qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" 660 " in pre-v3 image (L2 offset: %#" PRIx64 661 ", L2 index: %#x)", l2_offset, l2_index); 662 ret = -EIO; 663 goto fail; 664 } 665 switch (type) { 666 case QCOW2_SUBCLUSTER_INVALID: 667 break; /* This is handled by count_contiguous_subclusters() below */ 668 case QCOW2_SUBCLUSTER_COMPRESSED: 669 if (has_data_file(bs)) { 670 qcow2_signal_corruption(bs, true, -1, -1, "Compressed cluster " 671 "entry found in image with external data " 672 "file (L2 offset: %#" PRIx64 ", L2 index: " 673 "%#x)", l2_offset, l2_index); 674 ret = -EIO; 675 goto fail; 676 } 677 *host_offset = l2_entry; 678 break; 679 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 680 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 681 break; 682 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 683 case QCOW2_SUBCLUSTER_NORMAL: 684 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: { 685 uint64_t host_cluster_offset = l2_entry & L2E_OFFSET_MASK; 686 *host_offset = host_cluster_offset + offset_in_cluster; 687 if (offset_into_cluster(s, host_cluster_offset)) { 688 qcow2_signal_corruption(bs, true, -1, -1, 689 "Cluster allocation offset %#" 690 PRIx64 " unaligned (L2 offset: %#" PRIx64 691 ", L2 index: %#x)", host_cluster_offset, 692 l2_offset, l2_index); 693 ret = -EIO; 694 goto fail; 695 } 696 if (has_data_file(bs) && *host_offset != offset) { 697 qcow2_signal_corruption(bs, true, -1, -1, 698 "External data file host cluster offset %#" 699 PRIx64 " does not match guest cluster " 700 "offset: %#" PRIx64 701 ", L2 index: %#x)", host_cluster_offset, 702 offset - offset_in_cluster, l2_index); 703 ret = -EIO; 704 goto fail; 705 } 706 break; 707 } 708 default: 709 abort(); 710 } 711 712 sc = count_contiguous_subclusters(bs, nb_clusters, sc_index, 713 l2_slice, &l2_index); 714 if (sc < 0) { 715 qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster entry found " 716 " (L2 offset: %#" PRIx64 ", L2 index: %#x)", 717 l2_offset, l2_index); 718 ret = -EIO; 719 goto fail; 720 } 721 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 722 723 bytes_available = ((int64_t)sc + sc_index) << s->subcluster_bits; 724 725 out: 726 if (bytes_available > bytes_needed) { 727 bytes_available = bytes_needed; 728 } 729 730 /* bytes_available <= bytes_needed <= *bytes + offset_in_cluster; 731 * subtracting offset_in_cluster will therefore definitely yield something 732 * not exceeding UINT_MAX */ 733 assert(bytes_available - offset_in_cluster <= UINT_MAX); 734 *bytes = bytes_available - offset_in_cluster; 735 736 *subcluster_type = type; 737 738 return 0; 739 740 fail: 741 qcow2_cache_put(s->l2_table_cache, (void **)&l2_slice); 742 return ret; 743 } 744 745 /* 746 * get_cluster_table 747 * 748 * for a given disk offset, load (and allocate if needed) 749 * the appropriate slice of its l2 table. 750 * 751 * the cluster index in the l2 slice is given to the caller. 752 * 753 * Returns 0 on success, -errno in failure case 754 */ 755 static int get_cluster_table(BlockDriverState *bs, uint64_t offset, 756 uint64_t **new_l2_slice, 757 int *new_l2_index) 758 { 759 BDRVQcow2State *s = bs->opaque; 760 unsigned int l2_index; 761 uint64_t l1_index, l2_offset; 762 uint64_t *l2_slice = NULL; 763 int ret; 764 765 /* seek to the l2 offset in the l1 table */ 766 767 l1_index = offset_to_l1_index(s, offset); 768 if (l1_index >= s->l1_size) { 769 ret = qcow2_grow_l1_table(bs, l1_index + 1, false); 770 if (ret < 0) { 771 return ret; 772 } 773 } 774 775 assert(l1_index < s->l1_size); 776 l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 777 if (offset_into_cluster(s, l2_offset)) { 778 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 779 " unaligned (L1 index: %#" PRIx64 ")", 780 l2_offset, l1_index); 781 return -EIO; 782 } 783 784 if (!(s->l1_table[l1_index] & QCOW_OFLAG_COPIED)) { 785 /* First allocate a new L2 table (and do COW if needed) */ 786 ret = l2_allocate(bs, l1_index); 787 if (ret < 0) { 788 return ret; 789 } 790 791 /* Then decrease the refcount of the old table */ 792 if (l2_offset) { 793 qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), 794 QCOW2_DISCARD_OTHER); 795 } 796 797 /* Get the offset of the newly-allocated l2 table */ 798 l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 799 assert(offset_into_cluster(s, l2_offset) == 0); 800 } 801 802 /* load the l2 slice in memory */ 803 ret = l2_load(bs, offset, l2_offset, &l2_slice); 804 if (ret < 0) { 805 return ret; 806 } 807 808 /* find the cluster offset for the given disk offset */ 809 810 l2_index = offset_to_l2_slice_index(s, offset); 811 812 *new_l2_slice = l2_slice; 813 *new_l2_index = l2_index; 814 815 return 0; 816 } 817 818 /* 819 * alloc_compressed_cluster_offset 820 * 821 * For a given offset on the virtual disk, allocate a new compressed cluster 822 * and put the host offset of the cluster into *host_offset. If a cluster is 823 * already allocated at the offset, return an error. 824 * 825 * Return 0 on success and -errno in error cases 826 */ 827 int coroutine_fn qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, 828 uint64_t offset, 829 int compressed_size, 830 uint64_t *host_offset) 831 { 832 BDRVQcow2State *s = bs->opaque; 833 int l2_index, ret; 834 uint64_t *l2_slice; 835 int64_t cluster_offset; 836 int nb_csectors; 837 838 if (has_data_file(bs)) { 839 return 0; 840 } 841 842 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 843 if (ret < 0) { 844 return ret; 845 } 846 847 /* Compression can't overwrite anything. Fail if the cluster was already 848 * allocated. */ 849 cluster_offset = get_l2_entry(s, l2_slice, l2_index); 850 if (cluster_offset & L2E_OFFSET_MASK) { 851 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 852 return -EIO; 853 } 854 855 cluster_offset = qcow2_alloc_bytes(bs, compressed_size); 856 if (cluster_offset < 0) { 857 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 858 return cluster_offset; 859 } 860 861 nb_csectors = 862 (cluster_offset + compressed_size - 1) / QCOW2_COMPRESSED_SECTOR_SIZE - 863 (cluster_offset / QCOW2_COMPRESSED_SECTOR_SIZE); 864 865 /* The offset and size must fit in their fields of the L2 table entry */ 866 assert((cluster_offset & s->cluster_offset_mask) == cluster_offset); 867 assert((nb_csectors & s->csize_mask) == nb_csectors); 868 869 cluster_offset |= QCOW_OFLAG_COMPRESSED | 870 ((uint64_t)nb_csectors << s->csize_shift); 871 872 /* update L2 table */ 873 874 /* compressed clusters never have the copied flag */ 875 876 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); 877 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 878 set_l2_entry(s, l2_slice, l2_index, cluster_offset); 879 if (has_subclusters(s)) { 880 set_l2_bitmap(s, l2_slice, l2_index, 0); 881 } 882 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 883 884 *host_offset = cluster_offset & s->cluster_offset_mask; 885 return 0; 886 } 887 888 static int coroutine_fn perform_cow(BlockDriverState *bs, QCowL2Meta *m) 889 { 890 BDRVQcow2State *s = bs->opaque; 891 Qcow2COWRegion *start = &m->cow_start; 892 Qcow2COWRegion *end = &m->cow_end; 893 unsigned buffer_size; 894 unsigned data_bytes = end->offset - (start->offset + start->nb_bytes); 895 bool merge_reads; 896 uint8_t *start_buffer, *end_buffer; 897 QEMUIOVector qiov; 898 int ret; 899 900 assert(start->nb_bytes <= UINT_MAX - end->nb_bytes); 901 assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes); 902 assert(start->offset + start->nb_bytes <= end->offset); 903 904 if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) { 905 return 0; 906 } 907 908 /* If we have to read both the start and end COW regions and the 909 * middle region is not too large then perform just one read 910 * operation */ 911 merge_reads = start->nb_bytes && end->nb_bytes && data_bytes <= 16384; 912 if (merge_reads) { 913 buffer_size = start->nb_bytes + data_bytes + end->nb_bytes; 914 } else { 915 /* If we have to do two reads, add some padding in the middle 916 * if necessary to make sure that the end region is optimally 917 * aligned. */ 918 size_t align = bdrv_opt_mem_align(bs); 919 assert(align > 0 && align <= UINT_MAX); 920 assert(QEMU_ALIGN_UP(start->nb_bytes, align) <= 921 UINT_MAX - end->nb_bytes); 922 buffer_size = QEMU_ALIGN_UP(start->nb_bytes, align) + end->nb_bytes; 923 } 924 925 /* Reserve a buffer large enough to store all the data that we're 926 * going to read */ 927 start_buffer = qemu_try_blockalign(bs, buffer_size); 928 if (start_buffer == NULL) { 929 return -ENOMEM; 930 } 931 /* The part of the buffer where the end region is located */ 932 end_buffer = start_buffer + buffer_size - end->nb_bytes; 933 934 qemu_iovec_init(&qiov, 2 + (m->data_qiov ? 935 qemu_iovec_subvec_niov(m->data_qiov, 936 m->data_qiov_offset, 937 data_bytes) 938 : 0)); 939 940 qemu_co_mutex_unlock(&s->lock); 941 /* First we read the existing data from both COW regions. We 942 * either read the whole region in one go, or the start and end 943 * regions separately. */ 944 if (merge_reads) { 945 qemu_iovec_add(&qiov, start_buffer, buffer_size); 946 ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); 947 } else { 948 qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 949 ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); 950 if (ret < 0) { 951 goto fail; 952 } 953 954 qemu_iovec_reset(&qiov); 955 qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 956 ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov); 957 } 958 if (ret < 0) { 959 goto fail; 960 } 961 962 /* Encrypt the data if necessary before writing it */ 963 if (bs->encrypted) { 964 ret = qcow2_co_encrypt(bs, 965 m->alloc_offset + start->offset, 966 m->offset + start->offset, 967 start_buffer, start->nb_bytes); 968 if (ret < 0) { 969 goto fail; 970 } 971 972 ret = qcow2_co_encrypt(bs, 973 m->alloc_offset + end->offset, 974 m->offset + end->offset, 975 end_buffer, end->nb_bytes); 976 if (ret < 0) { 977 goto fail; 978 } 979 } 980 981 /* And now we can write everything. If we have the guest data we 982 * can write everything in one single operation */ 983 if (m->data_qiov) { 984 qemu_iovec_reset(&qiov); 985 if (start->nb_bytes) { 986 qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 987 } 988 qemu_iovec_concat(&qiov, m->data_qiov, m->data_qiov_offset, data_bytes); 989 if (end->nb_bytes) { 990 qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 991 } 992 /* NOTE: we have a write_aio blkdebug event here followed by 993 * a cow_write one in do_perform_cow_write(), but there's only 994 * one single I/O operation */ 995 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 996 ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); 997 } else { 998 /* If there's no guest data then write both COW regions separately */ 999 qemu_iovec_reset(&qiov); 1000 qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 1001 ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); 1002 if (ret < 0) { 1003 goto fail; 1004 } 1005 1006 qemu_iovec_reset(&qiov); 1007 qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 1008 ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov); 1009 } 1010 1011 fail: 1012 qemu_co_mutex_lock(&s->lock); 1013 1014 /* 1015 * Before we update the L2 table to actually point to the new cluster, we 1016 * need to be sure that the refcounts have been increased and COW was 1017 * handled. 1018 */ 1019 if (ret == 0) { 1020 qcow2_cache_depends_on_flush(s->l2_table_cache); 1021 } 1022 1023 qemu_vfree(start_buffer); 1024 qemu_iovec_destroy(&qiov); 1025 return ret; 1026 } 1027 1028 int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 1029 QCowL2Meta *m) 1030 { 1031 BDRVQcow2State *s = bs->opaque; 1032 int i, j = 0, l2_index, ret; 1033 uint64_t *old_cluster, *l2_slice; 1034 uint64_t cluster_offset = m->alloc_offset; 1035 1036 trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); 1037 assert(m->nb_clusters > 0); 1038 1039 old_cluster = g_try_new(uint64_t, m->nb_clusters); 1040 if (old_cluster == NULL) { 1041 ret = -ENOMEM; 1042 goto err; 1043 } 1044 1045 /* copy content of unmodified sectors */ 1046 ret = perform_cow(bs, m); 1047 if (ret < 0) { 1048 goto err; 1049 } 1050 1051 /* Update L2 table. */ 1052 if (s->use_lazy_refcounts) { 1053 qcow2_mark_dirty(bs); 1054 } 1055 if (qcow2_need_accurate_refcounts(s)) { 1056 qcow2_cache_set_dependency(bs, s->l2_table_cache, 1057 s->refcount_block_cache); 1058 } 1059 1060 ret = get_cluster_table(bs, m->offset, &l2_slice, &l2_index); 1061 if (ret < 0) { 1062 goto err; 1063 } 1064 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 1065 1066 assert(l2_index + m->nb_clusters <= s->l2_slice_size); 1067 assert(m->cow_end.offset + m->cow_end.nb_bytes <= 1068 m->nb_clusters << s->cluster_bits); 1069 for (i = 0; i < m->nb_clusters; i++) { 1070 uint64_t offset = cluster_offset + ((uint64_t)i << s->cluster_bits); 1071 /* if two concurrent writes happen to the same unallocated cluster 1072 * each write allocates separate cluster and writes data concurrently. 1073 * The first one to complete updates l2 table with pointer to its 1074 * cluster the second one has to do RMW (which is done above by 1075 * perform_cow()), update l2 table with its cluster pointer and free 1076 * old cluster. This is what this loop does */ 1077 if (get_l2_entry(s, l2_slice, l2_index + i) != 0) { 1078 old_cluster[j++] = get_l2_entry(s, l2_slice, l2_index + i); 1079 } 1080 1081 /* The offset must fit in the offset field of the L2 table entry */ 1082 assert((offset & L2E_OFFSET_MASK) == offset); 1083 1084 set_l2_entry(s, l2_slice, l2_index + i, offset | QCOW_OFLAG_COPIED); 1085 1086 /* Update bitmap with the subclusters that were just written */ 1087 if (has_subclusters(s) && !m->prealloc) { 1088 uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1089 unsigned written_from = m->cow_start.offset; 1090 unsigned written_to = m->cow_end.offset + m->cow_end.nb_bytes; 1091 int first_sc, last_sc; 1092 /* Narrow written_from and written_to down to the current cluster */ 1093 written_from = MAX(written_from, i << s->cluster_bits); 1094 written_to = MIN(written_to, (i + 1) << s->cluster_bits); 1095 assert(written_from < written_to); 1096 first_sc = offset_to_sc_index(s, written_from); 1097 last_sc = offset_to_sc_index(s, written_to - 1); 1098 l2_bitmap |= QCOW_OFLAG_SUB_ALLOC_RANGE(first_sc, last_sc + 1); 1099 l2_bitmap &= ~QCOW_OFLAG_SUB_ZERO_RANGE(first_sc, last_sc + 1); 1100 set_l2_bitmap(s, l2_slice, l2_index + i, l2_bitmap); 1101 } 1102 } 1103 1104 1105 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1106 1107 /* 1108 * If this was a COW, we need to decrease the refcount of the old cluster. 1109 * 1110 * Don't discard clusters that reach a refcount of 0 (e.g. compressed 1111 * clusters), the next write will reuse them anyway. 1112 */ 1113 if (!m->keep_old_clusters && j != 0) { 1114 for (i = 0; i < j; i++) { 1115 qcow2_free_any_cluster(bs, old_cluster[i], QCOW2_DISCARD_NEVER); 1116 } 1117 } 1118 1119 ret = 0; 1120 err: 1121 g_free(old_cluster); 1122 return ret; 1123 } 1124 1125 /** 1126 * Frees the allocated clusters because the request failed and they won't 1127 * actually be linked. 1128 */ 1129 void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) 1130 { 1131 BDRVQcow2State *s = bs->opaque; 1132 if (!has_data_file(bs) && !m->keep_old_clusters) { 1133 qcow2_free_clusters(bs, m->alloc_offset, 1134 m->nb_clusters << s->cluster_bits, 1135 QCOW2_DISCARD_NEVER); 1136 } 1137 } 1138 1139 /* 1140 * For a given write request, create a new QCowL2Meta structure, add 1141 * it to @m and the BDRVQcow2State.cluster_allocs list. If the write 1142 * request does not need copy-on-write or changes to the L2 metadata 1143 * then this function does nothing. 1144 * 1145 * @host_cluster_offset points to the beginning of the first cluster. 1146 * 1147 * @guest_offset and @bytes indicate the offset and length of the 1148 * request. 1149 * 1150 * @l2_slice contains the L2 entries of all clusters involved in this 1151 * write request. 1152 * 1153 * If @keep_old is true it means that the clusters were already 1154 * allocated and will be overwritten. If false then the clusters are 1155 * new and we have to decrease the reference count of the old ones. 1156 * 1157 * Returns 0 on success, -errno on failure. 1158 */ 1159 static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset, 1160 uint64_t guest_offset, unsigned bytes, 1161 uint64_t *l2_slice, QCowL2Meta **m, bool keep_old) 1162 { 1163 BDRVQcow2State *s = bs->opaque; 1164 int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset); 1165 uint64_t l2_entry, l2_bitmap; 1166 unsigned cow_start_from, cow_end_to; 1167 unsigned cow_start_to = offset_into_cluster(s, guest_offset); 1168 unsigned cow_end_from = cow_start_to + bytes; 1169 unsigned nb_clusters = size_to_clusters(s, cow_end_from); 1170 QCowL2Meta *old_m = *m; 1171 QCow2SubclusterType type; 1172 int i; 1173 bool skip_cow = keep_old; 1174 1175 assert(nb_clusters <= s->l2_slice_size - l2_index); 1176 1177 /* Check the type of all affected subclusters */ 1178 for (i = 0; i < nb_clusters; i++) { 1179 l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1180 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1181 if (skip_cow) { 1182 unsigned write_from = MAX(cow_start_to, i << s->cluster_bits); 1183 unsigned write_to = MIN(cow_end_from, (i + 1) << s->cluster_bits); 1184 int first_sc = offset_to_sc_index(s, write_from); 1185 int last_sc = offset_to_sc_index(s, write_to - 1); 1186 int cnt = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, 1187 first_sc, &type); 1188 /* Is any of the subclusters of type != QCOW2_SUBCLUSTER_NORMAL ? */ 1189 if (type != QCOW2_SUBCLUSTER_NORMAL || first_sc + cnt <= last_sc) { 1190 skip_cow = false; 1191 } 1192 } else { 1193 /* If we can't skip the cow we can still look for invalid entries */ 1194 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, 0); 1195 } 1196 if (type == QCOW2_SUBCLUSTER_INVALID) { 1197 int l1_index = offset_to_l1_index(s, guest_offset); 1198 uint64_t l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 1199 qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster " 1200 "entry found (L2 offset: %#" PRIx64 1201 ", L2 index: %#x)", 1202 l2_offset, l2_index + i); 1203 return -EIO; 1204 } 1205 } 1206 1207 if (skip_cow) { 1208 return 0; 1209 } 1210 1211 /* Get the L2 entry of the first cluster */ 1212 l2_entry = get_l2_entry(s, l2_slice, l2_index); 1213 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 1214 sc_index = offset_to_sc_index(s, guest_offset); 1215 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 1216 1217 if (!keep_old) { 1218 switch (type) { 1219 case QCOW2_SUBCLUSTER_COMPRESSED: 1220 cow_start_from = 0; 1221 break; 1222 case QCOW2_SUBCLUSTER_NORMAL: 1223 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1224 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1225 if (has_subclusters(s)) { 1226 /* Skip all leading zero and unallocated subclusters */ 1227 uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; 1228 cow_start_from = 1229 MIN(sc_index, ctz32(alloc_bitmap)) << s->subcluster_bits; 1230 } else { 1231 cow_start_from = 0; 1232 } 1233 break; 1234 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 1235 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 1236 cow_start_from = sc_index << s->subcluster_bits; 1237 break; 1238 default: 1239 g_assert_not_reached(); 1240 } 1241 } else { 1242 switch (type) { 1243 case QCOW2_SUBCLUSTER_NORMAL: 1244 cow_start_from = cow_start_to; 1245 break; 1246 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1247 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1248 cow_start_from = sc_index << s->subcluster_bits; 1249 break; 1250 default: 1251 g_assert_not_reached(); 1252 } 1253 } 1254 1255 /* Get the L2 entry of the last cluster */ 1256 l2_index += nb_clusters - 1; 1257 l2_entry = get_l2_entry(s, l2_slice, l2_index); 1258 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 1259 sc_index = offset_to_sc_index(s, guest_offset + bytes - 1); 1260 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 1261 1262 if (!keep_old) { 1263 switch (type) { 1264 case QCOW2_SUBCLUSTER_COMPRESSED: 1265 cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); 1266 break; 1267 case QCOW2_SUBCLUSTER_NORMAL: 1268 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1269 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1270 cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); 1271 if (has_subclusters(s)) { 1272 /* Skip all trailing zero and unallocated subclusters */ 1273 uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; 1274 cow_end_to -= 1275 MIN(s->subclusters_per_cluster - sc_index - 1, 1276 clz32(alloc_bitmap)) << s->subcluster_bits; 1277 } 1278 break; 1279 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 1280 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 1281 cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); 1282 break; 1283 default: 1284 g_assert_not_reached(); 1285 } 1286 } else { 1287 switch (type) { 1288 case QCOW2_SUBCLUSTER_NORMAL: 1289 cow_end_to = cow_end_from; 1290 break; 1291 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1292 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1293 cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); 1294 break; 1295 default: 1296 g_assert_not_reached(); 1297 } 1298 } 1299 1300 *m = g_malloc0(sizeof(**m)); 1301 **m = (QCowL2Meta) { 1302 .next = old_m, 1303 1304 .alloc_offset = host_cluster_offset, 1305 .offset = start_of_cluster(s, guest_offset), 1306 .nb_clusters = nb_clusters, 1307 1308 .keep_old_clusters = keep_old, 1309 1310 .cow_start = { 1311 .offset = cow_start_from, 1312 .nb_bytes = cow_start_to - cow_start_from, 1313 }, 1314 .cow_end = { 1315 .offset = cow_end_from, 1316 .nb_bytes = cow_end_to - cow_end_from, 1317 }, 1318 }; 1319 1320 qemu_co_queue_init(&(*m)->dependent_requests); 1321 QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); 1322 1323 return 0; 1324 } 1325 1326 /* 1327 * Returns true if writing to the cluster pointed to by @l2_entry 1328 * requires a new allocation (that is, if the cluster is unallocated 1329 * or has refcount > 1 and therefore cannot be written in-place). 1330 */ 1331 static bool cluster_needs_new_alloc(BlockDriverState *bs, uint64_t l2_entry) 1332 { 1333 switch (qcow2_get_cluster_type(bs, l2_entry)) { 1334 case QCOW2_CLUSTER_NORMAL: 1335 case QCOW2_CLUSTER_ZERO_ALLOC: 1336 if (l2_entry & QCOW_OFLAG_COPIED) { 1337 return false; 1338 } 1339 /* fallthrough */ 1340 case QCOW2_CLUSTER_UNALLOCATED: 1341 case QCOW2_CLUSTER_COMPRESSED: 1342 case QCOW2_CLUSTER_ZERO_PLAIN: 1343 return true; 1344 default: 1345 abort(); 1346 } 1347 } 1348 1349 /* 1350 * Returns the number of contiguous clusters that can be written to 1351 * using one single write request, starting from @l2_index. 1352 * At most @nb_clusters are checked. 1353 * 1354 * If @new_alloc is true this counts clusters that are either 1355 * unallocated, or allocated but with refcount > 1 (so they need to be 1356 * newly allocated and COWed). 1357 * 1358 * If @new_alloc is false this counts clusters that are already 1359 * allocated and can be overwritten in-place (this includes clusters 1360 * of type QCOW2_CLUSTER_ZERO_ALLOC). 1361 */ 1362 static int count_single_write_clusters(BlockDriverState *bs, int nb_clusters, 1363 uint64_t *l2_slice, int l2_index, 1364 bool new_alloc) 1365 { 1366 BDRVQcow2State *s = bs->opaque; 1367 uint64_t l2_entry = get_l2_entry(s, l2_slice, l2_index); 1368 uint64_t expected_offset = l2_entry & L2E_OFFSET_MASK; 1369 int i; 1370 1371 for (i = 0; i < nb_clusters; i++) { 1372 l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1373 if (cluster_needs_new_alloc(bs, l2_entry) != new_alloc) { 1374 break; 1375 } 1376 if (!new_alloc) { 1377 if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { 1378 break; 1379 } 1380 expected_offset += s->cluster_size; 1381 } 1382 } 1383 1384 assert(i <= nb_clusters); 1385 return i; 1386 } 1387 1388 /* 1389 * Check if there already is an AIO write request in flight which allocates 1390 * the same cluster. In this case we need to wait until the previous 1391 * request has completed and updated the L2 table accordingly. 1392 * 1393 * Returns: 1394 * 0 if there was no dependency. *cur_bytes indicates the number of 1395 * bytes from guest_offset that can be read before the next 1396 * dependency must be processed (or the request is complete) 1397 * 1398 * -EAGAIN if we had to wait for another request, previously gathered 1399 * information on cluster allocation may be invalid now. The caller 1400 * must start over anyway, so consider *cur_bytes undefined. 1401 */ 1402 static int coroutine_fn handle_dependencies(BlockDriverState *bs, 1403 uint64_t guest_offset, 1404 uint64_t *cur_bytes, QCowL2Meta **m) 1405 { 1406 BDRVQcow2State *s = bs->opaque; 1407 QCowL2Meta *old_alloc; 1408 uint64_t bytes = *cur_bytes; 1409 1410 QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { 1411 1412 uint64_t start = guest_offset; 1413 uint64_t end = start + bytes; 1414 uint64_t old_start = start_of_cluster(s, l2meta_cow_start(old_alloc)); 1415 uint64_t old_end = ROUND_UP(l2meta_cow_end(old_alloc), s->cluster_size); 1416 1417 if (end <= old_start || start >= old_end) { 1418 /* No intersection */ 1419 continue; 1420 } 1421 1422 if (old_alloc->keep_old_clusters && 1423 (end <= l2meta_cow_start(old_alloc) || 1424 start >= l2meta_cow_end(old_alloc))) 1425 { 1426 /* 1427 * Clusters intersect but COW areas don't. And cluster itself is 1428 * already allocated. So, there is no actual conflict. 1429 */ 1430 continue; 1431 } 1432 1433 /* Conflict */ 1434 1435 if (start < old_start) { 1436 /* Stop at the start of a running allocation */ 1437 bytes = old_start - start; 1438 } else { 1439 bytes = 0; 1440 } 1441 1442 /* 1443 * Stop if an l2meta already exists. After yielding, it wouldn't 1444 * be valid any more, so we'd have to clean up the old L2Metas 1445 * and deal with requests depending on them before starting to 1446 * gather new ones. Not worth the trouble. 1447 */ 1448 if (bytes == 0 && *m) { 1449 *cur_bytes = 0; 1450 return 0; 1451 } 1452 1453 if (bytes == 0) { 1454 /* 1455 * Wait for the dependency to complete. We need to recheck 1456 * the free/allocated clusters when we continue. 1457 */ 1458 qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock); 1459 return -EAGAIN; 1460 } 1461 } 1462 1463 /* Make sure that existing clusters and new allocations are only used up to 1464 * the next dependency if we shortened the request above */ 1465 *cur_bytes = bytes; 1466 1467 return 0; 1468 } 1469 1470 /* 1471 * Checks how many already allocated clusters that don't require a new 1472 * allocation there are at the given guest_offset (up to *bytes). 1473 * If *host_offset is not INV_OFFSET, only physically contiguous clusters 1474 * beginning at this host offset are counted. 1475 * 1476 * Note that guest_offset may not be cluster aligned. In this case, the 1477 * returned *host_offset points to exact byte referenced by guest_offset and 1478 * therefore isn't cluster aligned as well. 1479 * 1480 * Returns: 1481 * 0: if no allocated clusters are available at the given offset. 1482 * *bytes is normally unchanged. It is set to 0 if the cluster 1483 * is allocated and can be overwritten in-place but doesn't have 1484 * the right physical offset. 1485 * 1486 * 1: if allocated clusters that can be overwritten in place are 1487 * available at the requested offset. *bytes may have decreased 1488 * and describes the length of the area that can be written to. 1489 * 1490 * -errno: in error cases 1491 */ 1492 static int coroutine_fn handle_copied(BlockDriverState *bs, 1493 uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes, 1494 QCowL2Meta **m) 1495 { 1496 BDRVQcow2State *s = bs->opaque; 1497 int l2_index; 1498 uint64_t l2_entry, cluster_offset; 1499 uint64_t *l2_slice; 1500 uint64_t nb_clusters; 1501 unsigned int keep_clusters; 1502 int ret; 1503 1504 trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, 1505 *bytes); 1506 1507 assert(*host_offset == INV_OFFSET || offset_into_cluster(s, guest_offset) 1508 == offset_into_cluster(s, *host_offset)); 1509 1510 /* 1511 * Calculate the number of clusters to look for. We stop at L2 slice 1512 * boundaries to keep things simple. 1513 */ 1514 nb_clusters = 1515 size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 1516 1517 l2_index = offset_to_l2_slice_index(s, guest_offset); 1518 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 1519 /* Limit total byte count to BDRV_REQUEST_MAX_BYTES */ 1520 nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); 1521 1522 /* Find L2 entry for the first involved cluster */ 1523 ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); 1524 if (ret < 0) { 1525 return ret; 1526 } 1527 1528 l2_entry = get_l2_entry(s, l2_slice, l2_index); 1529 cluster_offset = l2_entry & L2E_OFFSET_MASK; 1530 1531 if (!cluster_needs_new_alloc(bs, l2_entry)) { 1532 if (offset_into_cluster(s, cluster_offset)) { 1533 qcow2_signal_corruption(bs, true, -1, -1, "%s cluster offset " 1534 "%#" PRIx64 " unaligned (guest offset: %#" 1535 PRIx64 ")", l2_entry & QCOW_OFLAG_ZERO ? 1536 "Preallocated zero" : "Data", 1537 cluster_offset, guest_offset); 1538 ret = -EIO; 1539 goto out; 1540 } 1541 1542 /* If a specific host_offset is required, check it */ 1543 if (*host_offset != INV_OFFSET && cluster_offset != *host_offset) { 1544 *bytes = 0; 1545 ret = 0; 1546 goto out; 1547 } 1548 1549 /* We keep all QCOW_OFLAG_COPIED clusters */ 1550 keep_clusters = count_single_write_clusters(bs, nb_clusters, l2_slice, 1551 l2_index, false); 1552 assert(keep_clusters <= nb_clusters); 1553 1554 *bytes = MIN(*bytes, 1555 keep_clusters * s->cluster_size 1556 - offset_into_cluster(s, guest_offset)); 1557 assert(*bytes != 0); 1558 1559 ret = calculate_l2_meta(bs, cluster_offset, guest_offset, 1560 *bytes, l2_slice, m, true); 1561 if (ret < 0) { 1562 goto out; 1563 } 1564 1565 ret = 1; 1566 } else { 1567 ret = 0; 1568 } 1569 1570 /* Cleanup */ 1571 out: 1572 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1573 1574 /* Only return a host offset if we actually made progress. Otherwise we 1575 * would make requirements for handle_alloc() that it can't fulfill */ 1576 if (ret > 0) { 1577 *host_offset = cluster_offset + offset_into_cluster(s, guest_offset); 1578 } 1579 1580 return ret; 1581 } 1582 1583 /* 1584 * Allocates new clusters for the given guest_offset. 1585 * 1586 * At most *nb_clusters are allocated, and on return *nb_clusters is updated to 1587 * contain the number of clusters that have been allocated and are contiguous 1588 * in the image file. 1589 * 1590 * If *host_offset is not INV_OFFSET, it specifies the offset in the image file 1591 * at which the new clusters must start. *nb_clusters can be 0 on return in 1592 * this case if the cluster at host_offset is already in use. If *host_offset 1593 * is INV_OFFSET, the clusters can be allocated anywhere in the image file. 1594 * 1595 * *host_offset is updated to contain the offset into the image file at which 1596 * the first allocated cluster starts. 1597 * 1598 * Return 0 on success and -errno in error cases. -EAGAIN means that the 1599 * function has been waiting for another request and the allocation must be 1600 * restarted, but the whole request should not be failed. 1601 */ 1602 static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, 1603 uint64_t *host_offset, uint64_t *nb_clusters) 1604 { 1605 BDRVQcow2State *s = bs->opaque; 1606 1607 trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, 1608 *host_offset, *nb_clusters); 1609 1610 if (has_data_file(bs)) { 1611 assert(*host_offset == INV_OFFSET || 1612 *host_offset == start_of_cluster(s, guest_offset)); 1613 *host_offset = start_of_cluster(s, guest_offset); 1614 return 0; 1615 } 1616 1617 /* Allocate new clusters */ 1618 trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); 1619 if (*host_offset == INV_OFFSET) { 1620 int64_t cluster_offset = 1621 qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); 1622 if (cluster_offset < 0) { 1623 return cluster_offset; 1624 } 1625 *host_offset = cluster_offset; 1626 return 0; 1627 } else { 1628 int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); 1629 if (ret < 0) { 1630 return ret; 1631 } 1632 *nb_clusters = ret; 1633 return 0; 1634 } 1635 } 1636 1637 /* 1638 * Allocates new clusters for an area that is either still unallocated or 1639 * cannot be overwritten in-place. If *host_offset is not INV_OFFSET, 1640 * clusters are only allocated if the new allocation can match the specified 1641 * host offset. 1642 * 1643 * Note that guest_offset may not be cluster aligned. In this case, the 1644 * returned *host_offset points to exact byte referenced by guest_offset and 1645 * therefore isn't cluster aligned as well. 1646 * 1647 * Returns: 1648 * 0: if no clusters could be allocated. *bytes is set to 0, 1649 * *host_offset is left unchanged. 1650 * 1651 * 1: if new clusters were allocated. *bytes may be decreased if the 1652 * new allocation doesn't cover all of the requested area. 1653 * *host_offset is updated to contain the host offset of the first 1654 * newly allocated cluster. 1655 * 1656 * -errno: in error cases 1657 */ 1658 static int coroutine_fn handle_alloc(BlockDriverState *bs, 1659 uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes, 1660 QCowL2Meta **m) 1661 { 1662 BDRVQcow2State *s = bs->opaque; 1663 int l2_index; 1664 uint64_t *l2_slice; 1665 uint64_t nb_clusters; 1666 int ret; 1667 1668 uint64_t alloc_cluster_offset; 1669 1670 trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, 1671 *bytes); 1672 assert(*bytes > 0); 1673 1674 /* 1675 * Calculate the number of clusters to look for. We stop at L2 slice 1676 * boundaries to keep things simple. 1677 */ 1678 nb_clusters = 1679 size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 1680 1681 l2_index = offset_to_l2_slice_index(s, guest_offset); 1682 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 1683 /* Limit total allocation byte count to BDRV_REQUEST_MAX_BYTES */ 1684 nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); 1685 1686 /* Find L2 entry for the first involved cluster */ 1687 ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); 1688 if (ret < 0) { 1689 return ret; 1690 } 1691 1692 nb_clusters = count_single_write_clusters(bs, nb_clusters, 1693 l2_slice, l2_index, true); 1694 1695 /* This function is only called when there were no non-COW clusters, so if 1696 * we can't find any unallocated or COW clusters either, something is 1697 * wrong with our code. */ 1698 assert(nb_clusters > 0); 1699 1700 /* Allocate at a given offset in the image file */ 1701 alloc_cluster_offset = *host_offset == INV_OFFSET ? INV_OFFSET : 1702 start_of_cluster(s, *host_offset); 1703 ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, 1704 &nb_clusters); 1705 if (ret < 0) { 1706 goto out; 1707 } 1708 1709 /* Can't extend contiguous allocation */ 1710 if (nb_clusters == 0) { 1711 *bytes = 0; 1712 ret = 0; 1713 goto out; 1714 } 1715 1716 assert(alloc_cluster_offset != INV_OFFSET); 1717 1718 /* 1719 * Save info needed for meta data update. 1720 * 1721 * requested_bytes: Number of bytes from the start of the first 1722 * newly allocated cluster to the end of the (possibly shortened 1723 * before) write request. 1724 * 1725 * avail_bytes: Number of bytes from the start of the first 1726 * newly allocated to the end of the last newly allocated cluster. 1727 * 1728 * nb_bytes: The number of bytes from the start of the first 1729 * newly allocated cluster to the end of the area that the write 1730 * request actually writes to (excluding COW at the end) 1731 */ 1732 uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset); 1733 int avail_bytes = nb_clusters << s->cluster_bits; 1734 int nb_bytes = MIN(requested_bytes, avail_bytes); 1735 1736 *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); 1737 *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset)); 1738 assert(*bytes != 0); 1739 1740 ret = calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes, 1741 l2_slice, m, false); 1742 if (ret < 0) { 1743 goto out; 1744 } 1745 1746 ret = 1; 1747 1748 out: 1749 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1750 return ret; 1751 } 1752 1753 /* 1754 * For a given area on the virtual disk defined by @offset and @bytes, 1755 * find the corresponding area on the qcow2 image, allocating new 1756 * clusters (or subclusters) if necessary. The result can span a 1757 * combination of allocated and previously unallocated clusters. 1758 * 1759 * Note that offset may not be cluster aligned. In this case, the returned 1760 * *host_offset points to exact byte referenced by offset and therefore 1761 * isn't cluster aligned as well. 1762 * 1763 * On return, @host_offset is set to the beginning of the requested 1764 * area. This area is guaranteed to be contiguous on the qcow2 file 1765 * but it can be smaller than initially requested. In this case @bytes 1766 * is updated with the actual size. 1767 * 1768 * If any clusters or subclusters were allocated then @m contains a 1769 * list with the information of all the affected regions. Note that 1770 * this can happen regardless of whether this function succeeds or 1771 * not. The caller is responsible for updating the L2 metadata of the 1772 * allocated clusters (on success) or freeing them (on failure), and 1773 * for clearing the contents of @m afterwards in both cases. 1774 * 1775 * If the request conflicts with another write request in flight, the coroutine 1776 * is queued and will be reentered when the dependency has completed. 1777 * 1778 * Return 0 on success and -errno in error cases 1779 */ 1780 int coroutine_fn qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, 1781 unsigned int *bytes, 1782 uint64_t *host_offset, 1783 QCowL2Meta **m) 1784 { 1785 BDRVQcow2State *s = bs->opaque; 1786 uint64_t start, remaining; 1787 uint64_t cluster_offset; 1788 uint64_t cur_bytes; 1789 int ret; 1790 1791 trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes); 1792 1793 again: 1794 start = offset; 1795 remaining = *bytes; 1796 cluster_offset = INV_OFFSET; 1797 *host_offset = INV_OFFSET; 1798 cur_bytes = 0; 1799 *m = NULL; 1800 1801 while (true) { 1802 1803 if (*host_offset == INV_OFFSET && cluster_offset != INV_OFFSET) { 1804 *host_offset = cluster_offset; 1805 } 1806 1807 assert(remaining >= cur_bytes); 1808 1809 start += cur_bytes; 1810 remaining -= cur_bytes; 1811 1812 if (cluster_offset != INV_OFFSET) { 1813 cluster_offset += cur_bytes; 1814 } 1815 1816 if (remaining == 0) { 1817 break; 1818 } 1819 1820 cur_bytes = remaining; 1821 1822 /* 1823 * Now start gathering as many contiguous clusters as possible: 1824 * 1825 * 1. Check for overlaps with in-flight allocations 1826 * 1827 * a) Overlap not in the first cluster -> shorten this request and 1828 * let the caller handle the rest in its next loop iteration. 1829 * 1830 * b) Real overlaps of two requests. Yield and restart the search 1831 * for contiguous clusters (the situation could have changed 1832 * while we were sleeping) 1833 * 1834 * c) TODO: Request starts in the same cluster as the in-flight 1835 * allocation ends. Shorten the COW of the in-fight allocation, 1836 * set cluster_offset to write to the same cluster and set up 1837 * the right synchronisation between the in-flight request and 1838 * the new one. 1839 */ 1840 ret = handle_dependencies(bs, start, &cur_bytes, m); 1841 if (ret == -EAGAIN) { 1842 /* Currently handle_dependencies() doesn't yield if we already had 1843 * an allocation. If it did, we would have to clean up the L2Meta 1844 * structs before starting over. */ 1845 assert(*m == NULL); 1846 goto again; 1847 } else if (ret < 0) { 1848 return ret; 1849 } else if (cur_bytes == 0) { 1850 break; 1851 } else { 1852 /* handle_dependencies() may have decreased cur_bytes (shortened 1853 * the allocations below) so that the next dependency is processed 1854 * correctly during the next loop iteration. */ 1855 } 1856 1857 /* 1858 * 2. Count contiguous COPIED clusters. 1859 */ 1860 ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); 1861 if (ret < 0) { 1862 return ret; 1863 } else if (ret) { 1864 continue; 1865 } else if (cur_bytes == 0) { 1866 break; 1867 } 1868 1869 /* 1870 * 3. If the request still hasn't completed, allocate new clusters, 1871 * considering any cluster_offset of steps 1c or 2. 1872 */ 1873 ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); 1874 if (ret < 0) { 1875 return ret; 1876 } else if (ret) { 1877 continue; 1878 } else { 1879 assert(cur_bytes == 0); 1880 break; 1881 } 1882 } 1883 1884 *bytes -= remaining; 1885 assert(*bytes > 0); 1886 assert(*host_offset != INV_OFFSET); 1887 assert(offset_into_cluster(s, *host_offset) == 1888 offset_into_cluster(s, offset)); 1889 1890 return 0; 1891 } 1892 1893 /* 1894 * This discards as many clusters of nb_clusters as possible at once (i.e. 1895 * all clusters in the same L2 slice) and returns the number of discarded 1896 * clusters. 1897 */ 1898 static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, 1899 uint64_t nb_clusters, 1900 enum qcow2_discard_type type, bool full_discard) 1901 { 1902 BDRVQcow2State *s = bs->opaque; 1903 uint64_t *l2_slice; 1904 int l2_index; 1905 int ret; 1906 int i; 1907 1908 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 1909 if (ret < 0) { 1910 return ret; 1911 } 1912 1913 /* Limit nb_clusters to one L2 slice */ 1914 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 1915 assert(nb_clusters <= INT_MAX); 1916 1917 for (i = 0; i < nb_clusters; i++) { 1918 uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1919 uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1920 uint64_t new_l2_entry = old_l2_entry; 1921 uint64_t new_l2_bitmap = old_l2_bitmap; 1922 QCow2ClusterType cluster_type = 1923 qcow2_get_cluster_type(bs, old_l2_entry); 1924 1925 /* 1926 * If full_discard is true, the cluster should not read back as zeroes, 1927 * but rather fall through to the backing file. 1928 * 1929 * If full_discard is false, make sure that a discarded area reads back 1930 * as zeroes for v3 images (we cannot do it for v2 without actually 1931 * writing a zero-filled buffer). We can skip the operation if the 1932 * cluster is already marked as zero, or if it's unallocated and we 1933 * don't have a backing file. 1934 * 1935 * TODO We might want to use bdrv_block_status(bs) here, but we're 1936 * holding s->lock, so that doesn't work today. 1937 */ 1938 if (full_discard) { 1939 new_l2_entry = new_l2_bitmap = 0; 1940 } else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) { 1941 if (has_subclusters(s)) { 1942 new_l2_entry = 0; 1943 new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; 1944 } else { 1945 new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0; 1946 } 1947 } 1948 1949 if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { 1950 continue; 1951 } 1952 1953 /* First remove L2 entries */ 1954 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 1955 set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); 1956 if (has_subclusters(s)) { 1957 set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); 1958 } 1959 /* Then decrease the refcount */ 1960 qcow2_free_any_cluster(bs, old_l2_entry, type); 1961 } 1962 1963 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1964 1965 return nb_clusters; 1966 } 1967 1968 int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, 1969 uint64_t bytes, enum qcow2_discard_type type, 1970 bool full_discard) 1971 { 1972 BDRVQcow2State *s = bs->opaque; 1973 uint64_t end_offset = offset + bytes; 1974 uint64_t nb_clusters; 1975 int64_t cleared; 1976 int ret; 1977 1978 /* Caller must pass aligned values, except at image end */ 1979 assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); 1980 assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || 1981 end_offset == bs->total_sectors << BDRV_SECTOR_BITS); 1982 1983 nb_clusters = size_to_clusters(s, bytes); 1984 1985 s->cache_discards = true; 1986 1987 /* Each L2 slice is handled by its own loop iteration */ 1988 while (nb_clusters > 0) { 1989 cleared = discard_in_l2_slice(bs, offset, nb_clusters, type, 1990 full_discard); 1991 if (cleared < 0) { 1992 ret = cleared; 1993 goto fail; 1994 } 1995 1996 nb_clusters -= cleared; 1997 offset += (cleared * s->cluster_size); 1998 } 1999 2000 ret = 0; 2001 fail: 2002 s->cache_discards = false; 2003 qcow2_process_discards(bs, ret); 2004 2005 return ret; 2006 } 2007 2008 /* 2009 * This zeroes as many clusters of nb_clusters as possible at once (i.e. 2010 * all clusters in the same L2 slice) and returns the number of zeroed 2011 * clusters. 2012 */ 2013 static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, 2014 uint64_t nb_clusters, int flags) 2015 { 2016 BDRVQcow2State *s = bs->opaque; 2017 uint64_t *l2_slice; 2018 int l2_index; 2019 int ret; 2020 int i; 2021 2022 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 2023 if (ret < 0) { 2024 return ret; 2025 } 2026 2027 /* Limit nb_clusters to one L2 slice */ 2028 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 2029 assert(nb_clusters <= INT_MAX); 2030 2031 for (i = 0; i < nb_clusters; i++) { 2032 uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 2033 uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 2034 QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry); 2035 bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) || 2036 ((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type)); 2037 uint64_t new_l2_entry = unmap ? 0 : old_l2_entry; 2038 uint64_t new_l2_bitmap = old_l2_bitmap; 2039 2040 if (has_subclusters(s)) { 2041 new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; 2042 } else { 2043 new_l2_entry |= QCOW_OFLAG_ZERO; 2044 } 2045 2046 if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { 2047 continue; 2048 } 2049 2050 /* First update L2 entries */ 2051 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 2052 set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); 2053 if (has_subclusters(s)) { 2054 set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); 2055 } 2056 2057 /* Then decrease the refcount */ 2058 if (unmap) { 2059 qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST); 2060 } 2061 } 2062 2063 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2064 2065 return nb_clusters; 2066 } 2067 2068 static int zero_l2_subclusters(BlockDriverState *bs, uint64_t offset, 2069 unsigned nb_subclusters) 2070 { 2071 BDRVQcow2State *s = bs->opaque; 2072 uint64_t *l2_slice; 2073 uint64_t old_l2_bitmap, l2_bitmap; 2074 int l2_index, ret, sc = offset_to_sc_index(s, offset); 2075 2076 /* For full clusters use zero_in_l2_slice() instead */ 2077 assert(nb_subclusters > 0 && nb_subclusters < s->subclusters_per_cluster); 2078 assert(sc + nb_subclusters <= s->subclusters_per_cluster); 2079 assert(offset_into_subcluster(s, offset) == 0); 2080 2081 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 2082 if (ret < 0) { 2083 return ret; 2084 } 2085 2086 switch (qcow2_get_cluster_type(bs, get_l2_entry(s, l2_slice, l2_index))) { 2087 case QCOW2_CLUSTER_COMPRESSED: 2088 ret = -ENOTSUP; /* We cannot partially zeroize compressed clusters */ 2089 goto out; 2090 case QCOW2_CLUSTER_NORMAL: 2091 case QCOW2_CLUSTER_UNALLOCATED: 2092 break; 2093 default: 2094 g_assert_not_reached(); 2095 } 2096 2097 old_l2_bitmap = l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 2098 2099 l2_bitmap |= QCOW_OFLAG_SUB_ZERO_RANGE(sc, sc + nb_subclusters); 2100 l2_bitmap &= ~QCOW_OFLAG_SUB_ALLOC_RANGE(sc, sc + nb_subclusters); 2101 2102 if (old_l2_bitmap != l2_bitmap) { 2103 set_l2_bitmap(s, l2_slice, l2_index, l2_bitmap); 2104 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 2105 } 2106 2107 ret = 0; 2108 out: 2109 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2110 2111 return ret; 2112 } 2113 2114 int coroutine_fn qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, 2115 uint64_t bytes, int flags) 2116 { 2117 BDRVQcow2State *s = bs->opaque; 2118 uint64_t end_offset = offset + bytes; 2119 uint64_t nb_clusters; 2120 unsigned head, tail; 2121 int64_t cleared; 2122 int ret; 2123 2124 /* If we have to stay in sync with an external data file, zero out 2125 * s->data_file first. */ 2126 if (data_file_is_raw(bs)) { 2127 assert(has_data_file(bs)); 2128 ret = bdrv_co_pwrite_zeroes(s->data_file, offset, bytes, flags); 2129 if (ret < 0) { 2130 return ret; 2131 } 2132 } 2133 2134 /* Caller must pass aligned values, except at image end */ 2135 assert(offset_into_subcluster(s, offset) == 0); 2136 assert(offset_into_subcluster(s, end_offset) == 0 || 2137 end_offset >= bs->total_sectors << BDRV_SECTOR_BITS); 2138 2139 /* 2140 * The zero flag is only supported by version 3 and newer. However, if we 2141 * have no backing file, we can resort to discard in version 2. 2142 */ 2143 if (s->qcow_version < 3) { 2144 if (!bs->backing) { 2145 return qcow2_cluster_discard(bs, offset, bytes, 2146 QCOW2_DISCARD_REQUEST, false); 2147 } 2148 return -ENOTSUP; 2149 } 2150 2151 head = MIN(end_offset, ROUND_UP(offset, s->cluster_size)) - offset; 2152 offset += head; 2153 2154 tail = (end_offset >= bs->total_sectors << BDRV_SECTOR_BITS) ? 0 : 2155 end_offset - MAX(offset, start_of_cluster(s, end_offset)); 2156 end_offset -= tail; 2157 2158 s->cache_discards = true; 2159 2160 if (head) { 2161 ret = zero_l2_subclusters(bs, offset - head, 2162 size_to_subclusters(s, head)); 2163 if (ret < 0) { 2164 goto fail; 2165 } 2166 } 2167 2168 /* Each L2 slice is handled by its own loop iteration */ 2169 nb_clusters = size_to_clusters(s, end_offset - offset); 2170 2171 while (nb_clusters > 0) { 2172 cleared = zero_in_l2_slice(bs, offset, nb_clusters, flags); 2173 if (cleared < 0) { 2174 ret = cleared; 2175 goto fail; 2176 } 2177 2178 nb_clusters -= cleared; 2179 offset += (cleared * s->cluster_size); 2180 } 2181 2182 if (tail) { 2183 ret = zero_l2_subclusters(bs, end_offset, size_to_subclusters(s, tail)); 2184 if (ret < 0) { 2185 goto fail; 2186 } 2187 } 2188 2189 ret = 0; 2190 fail: 2191 s->cache_discards = false; 2192 qcow2_process_discards(bs, ret); 2193 2194 return ret; 2195 } 2196 2197 /* 2198 * Expands all zero clusters in a specific L1 table (or deallocates them, for 2199 * non-backed non-pre-allocated zero clusters). 2200 * 2201 * l1_entries and *visited_l1_entries are used to keep track of progress for 2202 * status_cb(). l1_entries contains the total number of L1 entries and 2203 * *visited_l1_entries counts all visited L1 entries. 2204 */ 2205 static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, 2206 int l1_size, int64_t *visited_l1_entries, 2207 int64_t l1_entries, 2208 BlockDriverAmendStatusCB *status_cb, 2209 void *cb_opaque) 2210 { 2211 BDRVQcow2State *s = bs->opaque; 2212 bool is_active_l1 = (l1_table == s->l1_table); 2213 uint64_t *l2_slice = NULL; 2214 unsigned slice, slice_size2, n_slices; 2215 int ret; 2216 int i, j; 2217 2218 /* qcow2_downgrade() is not allowed in images with subclusters */ 2219 assert(!has_subclusters(s)); 2220 2221 slice_size2 = s->l2_slice_size * l2_entry_size(s); 2222 n_slices = s->cluster_size / slice_size2; 2223 2224 if (!is_active_l1) { 2225 /* inactive L2 tables require a buffer to be stored in when loading 2226 * them from disk */ 2227 l2_slice = qemu_try_blockalign(bs->file->bs, slice_size2); 2228 if (l2_slice == NULL) { 2229 return -ENOMEM; 2230 } 2231 } 2232 2233 for (i = 0; i < l1_size; i++) { 2234 uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; 2235 uint64_t l2_refcount; 2236 2237 if (!l2_offset) { 2238 /* unallocated */ 2239 (*visited_l1_entries)++; 2240 if (status_cb) { 2241 status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 2242 } 2243 continue; 2244 } 2245 2246 if (offset_into_cluster(s, l2_offset)) { 2247 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" 2248 PRIx64 " unaligned (L1 index: %#x)", 2249 l2_offset, i); 2250 ret = -EIO; 2251 goto fail; 2252 } 2253 2254 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, 2255 &l2_refcount); 2256 if (ret < 0) { 2257 goto fail; 2258 } 2259 2260 for (slice = 0; slice < n_slices; slice++) { 2261 uint64_t slice_offset = l2_offset + slice * slice_size2; 2262 bool l2_dirty = false; 2263 if (is_active_l1) { 2264 /* get active L2 tables from cache */ 2265 ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset, 2266 (void **)&l2_slice); 2267 } else { 2268 /* load inactive L2 tables from disk */ 2269 ret = bdrv_pread(bs->file, slice_offset, slice_size2, 2270 l2_slice, 0); 2271 } 2272 if (ret < 0) { 2273 goto fail; 2274 } 2275 2276 for (j = 0; j < s->l2_slice_size; j++) { 2277 uint64_t l2_entry = get_l2_entry(s, l2_slice, j); 2278 int64_t offset = l2_entry & L2E_OFFSET_MASK; 2279 QCow2ClusterType cluster_type = 2280 qcow2_get_cluster_type(bs, l2_entry); 2281 2282 if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN && 2283 cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) { 2284 continue; 2285 } 2286 2287 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2288 if (!bs->backing) { 2289 /* 2290 * not backed; therefore we can simply deallocate the 2291 * cluster. No need to call set_l2_bitmap(), this 2292 * function doesn't support images with subclusters. 2293 */ 2294 set_l2_entry(s, l2_slice, j, 0); 2295 l2_dirty = true; 2296 continue; 2297 } 2298 2299 offset = qcow2_alloc_clusters(bs, s->cluster_size); 2300 if (offset < 0) { 2301 ret = offset; 2302 goto fail; 2303 } 2304 2305 /* The offset must fit in the offset field */ 2306 assert((offset & L2E_OFFSET_MASK) == offset); 2307 2308 if (l2_refcount > 1) { 2309 /* For shared L2 tables, set the refcount accordingly 2310 * (it is already 1 and needs to be l2_refcount) */ 2311 ret = qcow2_update_cluster_refcount( 2312 bs, offset >> s->cluster_bits, 2313 refcount_diff(1, l2_refcount), false, 2314 QCOW2_DISCARD_OTHER); 2315 if (ret < 0) { 2316 qcow2_free_clusters(bs, offset, s->cluster_size, 2317 QCOW2_DISCARD_OTHER); 2318 goto fail; 2319 } 2320 } 2321 } 2322 2323 if (offset_into_cluster(s, offset)) { 2324 int l2_index = slice * s->l2_slice_size + j; 2325 qcow2_signal_corruption( 2326 bs, true, -1, -1, 2327 "Cluster allocation offset " 2328 "%#" PRIx64 " unaligned (L2 offset: %#" 2329 PRIx64 ", L2 index: %#x)", offset, 2330 l2_offset, l2_index); 2331 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2332 qcow2_free_clusters(bs, offset, s->cluster_size, 2333 QCOW2_DISCARD_ALWAYS); 2334 } 2335 ret = -EIO; 2336 goto fail; 2337 } 2338 2339 ret = qcow2_pre_write_overlap_check(bs, 0, offset, 2340 s->cluster_size, true); 2341 if (ret < 0) { 2342 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2343 qcow2_free_clusters(bs, offset, s->cluster_size, 2344 QCOW2_DISCARD_ALWAYS); 2345 } 2346 goto fail; 2347 } 2348 2349 ret = bdrv_pwrite_zeroes(s->data_file, offset, 2350 s->cluster_size, 0); 2351 if (ret < 0) { 2352 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2353 qcow2_free_clusters(bs, offset, s->cluster_size, 2354 QCOW2_DISCARD_ALWAYS); 2355 } 2356 goto fail; 2357 } 2358 2359 if (l2_refcount == 1) { 2360 set_l2_entry(s, l2_slice, j, offset | QCOW_OFLAG_COPIED); 2361 } else { 2362 set_l2_entry(s, l2_slice, j, offset); 2363 } 2364 /* 2365 * No need to call set_l2_bitmap() after set_l2_entry() because 2366 * this function doesn't support images with subclusters. 2367 */ 2368 l2_dirty = true; 2369 } 2370 2371 if (is_active_l1) { 2372 if (l2_dirty) { 2373 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 2374 qcow2_cache_depends_on_flush(s->l2_table_cache); 2375 } 2376 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2377 } else { 2378 if (l2_dirty) { 2379 ret = qcow2_pre_write_overlap_check( 2380 bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, 2381 slice_offset, slice_size2, false); 2382 if (ret < 0) { 2383 goto fail; 2384 } 2385 2386 ret = bdrv_pwrite(bs->file, slice_offset, slice_size2, 2387 l2_slice, 0); 2388 if (ret < 0) { 2389 goto fail; 2390 } 2391 } 2392 } 2393 } 2394 2395 (*visited_l1_entries)++; 2396 if (status_cb) { 2397 status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 2398 } 2399 } 2400 2401 ret = 0; 2402 2403 fail: 2404 if (l2_slice) { 2405 if (!is_active_l1) { 2406 qemu_vfree(l2_slice); 2407 } else { 2408 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2409 } 2410 } 2411 return ret; 2412 } 2413 2414 /* 2415 * For backed images, expands all zero clusters on the image. For non-backed 2416 * images, deallocates all non-pre-allocated zero clusters (and claims the 2417 * allocation for pre-allocated ones). This is important for downgrading to a 2418 * qcow2 version which doesn't yet support metadata zero clusters. 2419 */ 2420 int qcow2_expand_zero_clusters(BlockDriverState *bs, 2421 BlockDriverAmendStatusCB *status_cb, 2422 void *cb_opaque) 2423 { 2424 BDRVQcow2State *s = bs->opaque; 2425 uint64_t *l1_table = NULL; 2426 int64_t l1_entries = 0, visited_l1_entries = 0; 2427 int ret; 2428 int i, j; 2429 2430 if (status_cb) { 2431 l1_entries = s->l1_size; 2432 for (i = 0; i < s->nb_snapshots; i++) { 2433 l1_entries += s->snapshots[i].l1_size; 2434 } 2435 } 2436 2437 ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, 2438 &visited_l1_entries, l1_entries, 2439 status_cb, cb_opaque); 2440 if (ret < 0) { 2441 goto fail; 2442 } 2443 2444 /* Inactive L1 tables may point to active L2 tables - therefore it is 2445 * necessary to flush the L2 table cache before trying to access the L2 2446 * tables pointed to by inactive L1 entries (else we might try to expand 2447 * zero clusters that have already been expanded); furthermore, it is also 2448 * necessary to empty the L2 table cache, since it may contain tables which 2449 * are now going to be modified directly on disk, bypassing the cache. 2450 * qcow2_cache_empty() does both for us. */ 2451 ret = qcow2_cache_empty(bs, s->l2_table_cache); 2452 if (ret < 0) { 2453 goto fail; 2454 } 2455 2456 for (i = 0; i < s->nb_snapshots; i++) { 2457 int l1_size2; 2458 uint64_t *new_l1_table; 2459 Error *local_err = NULL; 2460 2461 ret = qcow2_validate_table(bs, s->snapshots[i].l1_table_offset, 2462 s->snapshots[i].l1_size, L1E_SIZE, 2463 QCOW_MAX_L1_SIZE, "Snapshot L1 table", 2464 &local_err); 2465 if (ret < 0) { 2466 error_report_err(local_err); 2467 goto fail; 2468 } 2469 2470 l1_size2 = s->snapshots[i].l1_size * L1E_SIZE; 2471 new_l1_table = g_try_realloc(l1_table, l1_size2); 2472 2473 if (!new_l1_table) { 2474 ret = -ENOMEM; 2475 goto fail; 2476 } 2477 2478 l1_table = new_l1_table; 2479 2480 ret = bdrv_pread(bs->file, s->snapshots[i].l1_table_offset, l1_size2, 2481 l1_table, 0); 2482 if (ret < 0) { 2483 goto fail; 2484 } 2485 2486 for (j = 0; j < s->snapshots[i].l1_size; j++) { 2487 be64_to_cpus(&l1_table[j]); 2488 } 2489 2490 ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, 2491 &visited_l1_entries, l1_entries, 2492 status_cb, cb_opaque); 2493 if (ret < 0) { 2494 goto fail; 2495 } 2496 } 2497 2498 ret = 0; 2499 2500 fail: 2501 g_free(l1_table); 2502 return ret; 2503 } 2504 2505 void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry, 2506 uint64_t *coffset, int *csize) 2507 { 2508 BDRVQcow2State *s = bs->opaque; 2509 int nb_csectors; 2510 2511 assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED); 2512 2513 *coffset = l2_entry & s->cluster_offset_mask; 2514 2515 nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1; 2516 *csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE - 2517 (*coffset & (QCOW2_COMPRESSED_SECTOR_SIZE - 1)); 2518 } 2519