1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include <zlib.h> 27 28 #include "block/block-io.h" 29 #include "qapi/error.h" 30 #include "qcow2.h" 31 #include "qemu/bswap.h" 32 #include "qemu/memalign.h" 33 #include "trace.h" 34 35 int coroutine_fn qcow2_shrink_l1_table(BlockDriverState *bs, 36 uint64_t exact_size) 37 { 38 BDRVQcow2State *s = bs->opaque; 39 int new_l1_size, i, ret; 40 41 if (exact_size >= s->l1_size) { 42 return 0; 43 } 44 45 new_l1_size = exact_size; 46 47 #ifdef DEBUG_ALLOC2 48 fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size); 49 #endif 50 51 BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE); 52 ret = bdrv_co_pwrite_zeroes(bs->file, 53 s->l1_table_offset + new_l1_size * L1E_SIZE, 54 (s->l1_size - new_l1_size) * L1E_SIZE, 0); 55 if (ret < 0) { 56 goto fail; 57 } 58 59 ret = bdrv_co_flush(bs->file->bs); 60 if (ret < 0) { 61 goto fail; 62 } 63 64 BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS); 65 for (i = s->l1_size - 1; i > new_l1_size - 1; i--) { 66 if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) { 67 continue; 68 } 69 qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK, 70 s->cluster_size, QCOW2_DISCARD_ALWAYS); 71 s->l1_table[i] = 0; 72 } 73 return 0; 74 75 fail: 76 /* 77 * If the write in the l1_table failed the image may contain a partially 78 * overwritten l1_table. In this case it would be better to clear the 79 * l1_table in memory to avoid possible image corruption. 80 */ 81 memset(s->l1_table + new_l1_size, 0, 82 (s->l1_size - new_l1_size) * L1E_SIZE); 83 return ret; 84 } 85 86 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, 87 bool exact_size) 88 { 89 BDRVQcow2State *s = bs->opaque; 90 int new_l1_size2, ret, i; 91 uint64_t *new_l1_table; 92 int64_t old_l1_table_offset, old_l1_size; 93 int64_t new_l1_table_offset, new_l1_size; 94 uint8_t data[12]; 95 96 if (min_size <= s->l1_size) 97 return 0; 98 99 /* Do a sanity check on min_size before trying to calculate new_l1_size 100 * (this prevents overflows during the while loop for the calculation of 101 * new_l1_size) */ 102 if (min_size > INT_MAX / L1E_SIZE) { 103 return -EFBIG; 104 } 105 106 if (exact_size) { 107 new_l1_size = min_size; 108 } else { 109 /* Bump size up to reduce the number of times we have to grow */ 110 new_l1_size = s->l1_size; 111 if (new_l1_size == 0) { 112 new_l1_size = 1; 113 } 114 while (min_size > new_l1_size) { 115 new_l1_size = DIV_ROUND_UP(new_l1_size * 3, 2); 116 } 117 } 118 119 QEMU_BUILD_BUG_ON(QCOW_MAX_L1_SIZE > INT_MAX); 120 if (new_l1_size > QCOW_MAX_L1_SIZE / L1E_SIZE) { 121 return -EFBIG; 122 } 123 124 #ifdef DEBUG_ALLOC2 125 fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", 126 s->l1_size, new_l1_size); 127 #endif 128 129 new_l1_size2 = L1E_SIZE * new_l1_size; 130 new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_size2); 131 if (new_l1_table == NULL) { 132 return -ENOMEM; 133 } 134 memset(new_l1_table, 0, new_l1_size2); 135 136 if (s->l1_size) { 137 memcpy(new_l1_table, s->l1_table, s->l1_size * L1E_SIZE); 138 } 139 140 /* write new table (align to cluster) */ 141 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); 142 new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); 143 if (new_l1_table_offset < 0) { 144 qemu_vfree(new_l1_table); 145 return new_l1_table_offset; 146 } 147 148 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 149 if (ret < 0) { 150 goto fail; 151 } 152 153 /* the L1 position has not yet been updated, so these clusters must 154 * indeed be completely free */ 155 ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset, 156 new_l1_size2, false); 157 if (ret < 0) { 158 goto fail; 159 } 160 161 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); 162 for(i = 0; i < s->l1_size; i++) 163 new_l1_table[i] = cpu_to_be64(new_l1_table[i]); 164 ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_size2, 165 new_l1_table, 0); 166 if (ret < 0) 167 goto fail; 168 for(i = 0; i < s->l1_size; i++) 169 new_l1_table[i] = be64_to_cpu(new_l1_table[i]); 170 171 /* set new table */ 172 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); 173 stl_be_p(data, new_l1_size); 174 stq_be_p(data + 4, new_l1_table_offset); 175 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), 176 sizeof(data), data, 0); 177 if (ret < 0) { 178 goto fail; 179 } 180 qemu_vfree(s->l1_table); 181 old_l1_table_offset = s->l1_table_offset; 182 s->l1_table_offset = new_l1_table_offset; 183 s->l1_table = new_l1_table; 184 old_l1_size = s->l1_size; 185 s->l1_size = new_l1_size; 186 qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * L1E_SIZE, 187 QCOW2_DISCARD_OTHER); 188 return 0; 189 fail: 190 qemu_vfree(new_l1_table); 191 qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, 192 QCOW2_DISCARD_OTHER); 193 return ret; 194 } 195 196 /* 197 * l2_load 198 * 199 * @bs: The BlockDriverState 200 * @offset: A guest offset, used to calculate what slice of the L2 201 * table to load. 202 * @l2_offset: Offset to the L2 table in the image file. 203 * @l2_slice: Location to store the pointer to the L2 slice. 204 * 205 * Loads a L2 slice into memory (L2 slices are the parts of L2 tables 206 * that are loaded by the qcow2 cache). If the slice is in the cache, 207 * the cache is used; otherwise the L2 slice is loaded from the image 208 * file. 209 */ 210 static int l2_load(BlockDriverState *bs, uint64_t offset, 211 uint64_t l2_offset, uint64_t **l2_slice) 212 { 213 BDRVQcow2State *s = bs->opaque; 214 int start_of_slice = l2_entry_size(s) * 215 (offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset)); 216 217 return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice, 218 (void **)l2_slice); 219 } 220 221 /* 222 * Writes an L1 entry to disk (note that depending on the alignment 223 * requirements this function may write more that just one entry in 224 * order to prevent bdrv_pwrite from performing a read-modify-write) 225 */ 226 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) 227 { 228 BDRVQcow2State *s = bs->opaque; 229 int l1_start_index; 230 int i, ret; 231 int bufsize = MAX(L1E_SIZE, 232 MIN(bs->file->bs->bl.request_alignment, s->cluster_size)); 233 int nentries = bufsize / L1E_SIZE; 234 g_autofree uint64_t *buf = g_try_new0(uint64_t, nentries); 235 236 if (buf == NULL) { 237 return -ENOMEM; 238 } 239 240 l1_start_index = QEMU_ALIGN_DOWN(l1_index, nentries); 241 for (i = 0; i < MIN(nentries, s->l1_size - l1_start_index); i++) { 242 buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]); 243 } 244 245 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1, 246 s->l1_table_offset + L1E_SIZE * l1_start_index, bufsize, false); 247 if (ret < 0) { 248 return ret; 249 } 250 251 BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); 252 ret = bdrv_pwrite_sync(bs->file, 253 s->l1_table_offset + L1E_SIZE * l1_start_index, 254 bufsize, buf, 0); 255 if (ret < 0) { 256 return ret; 257 } 258 259 return 0; 260 } 261 262 /* 263 * l2_allocate 264 * 265 * Allocate a new l2 entry in the file. If l1_index points to an already 266 * used entry in the L2 table (i.e. we are doing a copy on write for the L2 267 * table) copy the contents of the old L2 table into the newly allocated one. 268 * Otherwise the new table is initialized with zeros. 269 * 270 */ 271 272 static int l2_allocate(BlockDriverState *bs, int l1_index) 273 { 274 BDRVQcow2State *s = bs->opaque; 275 uint64_t old_l2_offset; 276 uint64_t *l2_slice = NULL; 277 unsigned slice, slice_size2, n_slices; 278 int64_t l2_offset; 279 int ret; 280 281 old_l2_offset = s->l1_table[l1_index]; 282 283 trace_qcow2_l2_allocate(bs, l1_index); 284 285 /* allocate a new l2 entry */ 286 287 l2_offset = qcow2_alloc_clusters(bs, s->l2_size * l2_entry_size(s)); 288 if (l2_offset < 0) { 289 ret = l2_offset; 290 goto fail; 291 } 292 293 /* The offset must fit in the offset field of the L1 table entry */ 294 assert((l2_offset & L1E_OFFSET_MASK) == l2_offset); 295 296 /* If we're allocating the table at offset 0 then something is wrong */ 297 if (l2_offset == 0) { 298 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid " 299 "allocation of L2 table at offset 0"); 300 ret = -EIO; 301 goto fail; 302 } 303 304 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 305 if (ret < 0) { 306 goto fail; 307 } 308 309 /* allocate a new entry in the l2 cache */ 310 311 slice_size2 = s->l2_slice_size * l2_entry_size(s); 312 n_slices = s->cluster_size / slice_size2; 313 314 trace_qcow2_l2_allocate_get_empty(bs, l1_index); 315 for (slice = 0; slice < n_slices; slice++) { 316 ret = qcow2_cache_get_empty(bs, s->l2_table_cache, 317 l2_offset + slice * slice_size2, 318 (void **) &l2_slice); 319 if (ret < 0) { 320 goto fail; 321 } 322 323 if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { 324 /* if there was no old l2 table, clear the new slice */ 325 memset(l2_slice, 0, slice_size2); 326 } else { 327 uint64_t *old_slice; 328 uint64_t old_l2_slice_offset = 329 (old_l2_offset & L1E_OFFSET_MASK) + slice * slice_size2; 330 331 /* if there was an old l2 table, read a slice from the disk */ 332 BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); 333 ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_slice_offset, 334 (void **) &old_slice); 335 if (ret < 0) { 336 goto fail; 337 } 338 339 memcpy(l2_slice, old_slice, slice_size2); 340 341 qcow2_cache_put(s->l2_table_cache, (void **) &old_slice); 342 } 343 344 /* write the l2 slice to the file */ 345 BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); 346 347 trace_qcow2_l2_allocate_write_l2(bs, l1_index); 348 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 349 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 350 } 351 352 ret = qcow2_cache_flush(bs, s->l2_table_cache); 353 if (ret < 0) { 354 goto fail; 355 } 356 357 /* update the L1 entry */ 358 trace_qcow2_l2_allocate_write_l1(bs, l1_index); 359 s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; 360 ret = qcow2_write_l1_entry(bs, l1_index); 361 if (ret < 0) { 362 goto fail; 363 } 364 365 trace_qcow2_l2_allocate_done(bs, l1_index, 0); 366 return 0; 367 368 fail: 369 trace_qcow2_l2_allocate_done(bs, l1_index, ret); 370 if (l2_slice != NULL) { 371 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 372 } 373 s->l1_table[l1_index] = old_l2_offset; 374 if (l2_offset > 0) { 375 qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), 376 QCOW2_DISCARD_ALWAYS); 377 } 378 return ret; 379 } 380 381 /* 382 * For a given L2 entry, count the number of contiguous subclusters of 383 * the same type starting from @sc_from. Compressed clusters are 384 * treated as if they were divided into subclusters of size 385 * s->subcluster_size. 386 * 387 * Return the number of contiguous subclusters and set @type to the 388 * subcluster type. 389 * 390 * If the L2 entry is invalid return -errno and set @type to 391 * QCOW2_SUBCLUSTER_INVALID. 392 */ 393 static int qcow2_get_subcluster_range_type(BlockDriverState *bs, 394 uint64_t l2_entry, 395 uint64_t l2_bitmap, 396 unsigned sc_from, 397 QCow2SubclusterType *type) 398 { 399 BDRVQcow2State *s = bs->opaque; 400 uint32_t val; 401 402 *type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_from); 403 404 if (*type == QCOW2_SUBCLUSTER_INVALID) { 405 return -EINVAL; 406 } else if (!has_subclusters(s) || *type == QCOW2_SUBCLUSTER_COMPRESSED) { 407 return s->subclusters_per_cluster - sc_from; 408 } 409 410 switch (*type) { 411 case QCOW2_SUBCLUSTER_NORMAL: 412 val = l2_bitmap | QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); 413 return cto32(val) - sc_from; 414 415 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 416 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 417 val = (l2_bitmap | QCOW_OFLAG_SUB_ZERO_RANGE(0, sc_from)) >> 32; 418 return cto32(val) - sc_from; 419 420 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 421 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 422 val = ((l2_bitmap >> 32) | l2_bitmap) 423 & ~QCOW_OFLAG_SUB_ALLOC_RANGE(0, sc_from); 424 return ctz32(val) - sc_from; 425 426 default: 427 g_assert_not_reached(); 428 } 429 } 430 431 /* 432 * Return the number of contiguous subclusters of the exact same type 433 * in a given L2 slice, starting from cluster @l2_index, subcluster 434 * @sc_index. Allocated subclusters are required to be contiguous in 435 * the image file. 436 * At most @nb_clusters are checked (note that this means clusters, 437 * not subclusters). 438 * Compressed clusters are always processed one by one but for the 439 * purpose of this count they are treated as if they were divided into 440 * subclusters of size s->subcluster_size. 441 * On failure return -errno and update @l2_index to point to the 442 * invalid entry. 443 */ 444 static int count_contiguous_subclusters(BlockDriverState *bs, int nb_clusters, 445 unsigned sc_index, uint64_t *l2_slice, 446 unsigned *l2_index) 447 { 448 BDRVQcow2State *s = bs->opaque; 449 int i, count = 0; 450 bool check_offset = false; 451 uint64_t expected_offset = 0; 452 QCow2SubclusterType expected_type = QCOW2_SUBCLUSTER_NORMAL, type; 453 454 assert(*l2_index + nb_clusters <= s->l2_slice_size); 455 456 for (i = 0; i < nb_clusters; i++) { 457 unsigned first_sc = (i == 0) ? sc_index : 0; 458 uint64_t l2_entry = get_l2_entry(s, l2_slice, *l2_index + i); 459 uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, *l2_index + i); 460 int ret = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, 461 first_sc, &type); 462 if (ret < 0) { 463 *l2_index += i; /* Point to the invalid entry */ 464 return -EIO; 465 } 466 if (i == 0) { 467 if (type == QCOW2_SUBCLUSTER_COMPRESSED) { 468 /* Compressed clusters are always processed one by one */ 469 return ret; 470 } 471 expected_type = type; 472 expected_offset = l2_entry & L2E_OFFSET_MASK; 473 check_offset = (type == QCOW2_SUBCLUSTER_NORMAL || 474 type == QCOW2_SUBCLUSTER_ZERO_ALLOC || 475 type == QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC); 476 } else if (type != expected_type) { 477 break; 478 } else if (check_offset) { 479 expected_offset += s->cluster_size; 480 if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { 481 break; 482 } 483 } 484 count += ret; 485 /* Stop if there are type changes before the end of the cluster */ 486 if (first_sc + ret < s->subclusters_per_cluster) { 487 break; 488 } 489 } 490 491 return count; 492 } 493 494 static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, 495 uint64_t src_cluster_offset, 496 unsigned offset_in_cluster, 497 QEMUIOVector *qiov) 498 { 499 int ret; 500 501 if (qiov->size == 0) { 502 return 0; 503 } 504 505 BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); 506 507 if (!bs->drv) { 508 return -ENOMEDIUM; 509 } 510 511 /* 512 * We never deal with requests that don't satisfy 513 * bdrv_check_qiov_request(), and aligning requests to clusters never 514 * breaks this condition. So, do some assertions before calling 515 * bs->drv->bdrv_co_preadv_part() which has int64_t arguments. 516 */ 517 assert(src_cluster_offset <= INT64_MAX); 518 assert(src_cluster_offset + offset_in_cluster <= INT64_MAX); 519 /* Cast qiov->size to uint64_t to silence a compiler warning on -m32 */ 520 assert((uint64_t)qiov->size <= INT64_MAX); 521 bdrv_check_qiov_request(src_cluster_offset + offset_in_cluster, qiov->size, 522 qiov, 0, &error_abort); 523 /* 524 * Call .bdrv_co_readv() directly instead of using the public block-layer 525 * interface. This avoids double I/O throttling and request tracking, 526 * which can lead to deadlock when block layer copy-on-read is enabled. 527 */ 528 ret = bs->drv->bdrv_co_preadv_part(bs, 529 src_cluster_offset + offset_in_cluster, 530 qiov->size, qiov, 0, 0); 531 if (ret < 0) { 532 return ret; 533 } 534 535 return 0; 536 } 537 538 static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, 539 uint64_t cluster_offset, 540 unsigned offset_in_cluster, 541 QEMUIOVector *qiov) 542 { 543 BDRVQcow2State *s = bs->opaque; 544 int ret; 545 546 if (qiov->size == 0) { 547 return 0; 548 } 549 550 ret = qcow2_pre_write_overlap_check(bs, 0, 551 cluster_offset + offset_in_cluster, qiov->size, true); 552 if (ret < 0) { 553 return ret; 554 } 555 556 BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); 557 ret = bdrv_co_pwritev(s->data_file, cluster_offset + offset_in_cluster, 558 qiov->size, qiov, 0); 559 if (ret < 0) { 560 return ret; 561 } 562 563 return 0; 564 } 565 566 567 /* 568 * get_host_offset 569 * 570 * For a given offset of the virtual disk find the equivalent host 571 * offset in the qcow2 file and store it in *host_offset. Neither 572 * offset needs to be aligned to a cluster boundary. 573 * 574 * If the cluster is unallocated then *host_offset will be 0. 575 * If the cluster is compressed then *host_offset will contain the l2 entry. 576 * 577 * On entry, *bytes is the maximum number of contiguous bytes starting at 578 * offset that we are interested in. 579 * 580 * On exit, *bytes is the number of bytes starting at offset that have the same 581 * subcluster type and (if applicable) are stored contiguously in the image 582 * file. The subcluster type is stored in *subcluster_type. 583 * Compressed clusters are always processed one by one. 584 * 585 * Returns 0 on success, -errno in error cases. 586 */ 587 int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, 588 unsigned int *bytes, uint64_t *host_offset, 589 QCow2SubclusterType *subcluster_type) 590 { 591 BDRVQcow2State *s = bs->opaque; 592 unsigned int l2_index, sc_index; 593 uint64_t l1_index, l2_offset, *l2_slice, l2_entry, l2_bitmap; 594 int sc; 595 unsigned int offset_in_cluster; 596 uint64_t bytes_available, bytes_needed, nb_clusters; 597 QCow2SubclusterType type; 598 int ret; 599 600 offset_in_cluster = offset_into_cluster(s, offset); 601 bytes_needed = (uint64_t) *bytes + offset_in_cluster; 602 603 /* compute how many bytes there are between the start of the cluster 604 * containing offset and the end of the l2 slice that contains 605 * the entry pointing to it */ 606 bytes_available = 607 ((uint64_t) (s->l2_slice_size - offset_to_l2_slice_index(s, offset))) 608 << s->cluster_bits; 609 610 if (bytes_needed > bytes_available) { 611 bytes_needed = bytes_available; 612 } 613 614 *host_offset = 0; 615 616 /* seek to the l2 offset in the l1 table */ 617 618 l1_index = offset_to_l1_index(s, offset); 619 if (l1_index >= s->l1_size) { 620 type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; 621 goto out; 622 } 623 624 l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 625 if (!l2_offset) { 626 type = QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN; 627 goto out; 628 } 629 630 if (offset_into_cluster(s, l2_offset)) { 631 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 632 " unaligned (L1 index: %#" PRIx64 ")", 633 l2_offset, l1_index); 634 return -EIO; 635 } 636 637 /* load the l2 slice in memory */ 638 639 ret = l2_load(bs, offset, l2_offset, &l2_slice); 640 if (ret < 0) { 641 return ret; 642 } 643 644 /* find the cluster offset for the given disk offset */ 645 646 l2_index = offset_to_l2_slice_index(s, offset); 647 sc_index = offset_to_sc_index(s, offset); 648 l2_entry = get_l2_entry(s, l2_slice, l2_index); 649 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 650 651 nb_clusters = size_to_clusters(s, bytes_needed); 652 /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned 653 * integers; the minimum cluster size is 512, so this assertion is always 654 * true */ 655 assert(nb_clusters <= INT_MAX); 656 657 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 658 if (s->qcow_version < 3 && (type == QCOW2_SUBCLUSTER_ZERO_PLAIN || 659 type == QCOW2_SUBCLUSTER_ZERO_ALLOC)) { 660 qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found" 661 " in pre-v3 image (L2 offset: %#" PRIx64 662 ", L2 index: %#x)", l2_offset, l2_index); 663 ret = -EIO; 664 goto fail; 665 } 666 switch (type) { 667 case QCOW2_SUBCLUSTER_INVALID: 668 break; /* This is handled by count_contiguous_subclusters() below */ 669 case QCOW2_SUBCLUSTER_COMPRESSED: 670 if (has_data_file(bs)) { 671 qcow2_signal_corruption(bs, true, -1, -1, "Compressed cluster " 672 "entry found in image with external data " 673 "file (L2 offset: %#" PRIx64 ", L2 index: " 674 "%#x)", l2_offset, l2_index); 675 ret = -EIO; 676 goto fail; 677 } 678 *host_offset = l2_entry; 679 break; 680 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 681 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 682 break; 683 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 684 case QCOW2_SUBCLUSTER_NORMAL: 685 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: { 686 uint64_t host_cluster_offset = l2_entry & L2E_OFFSET_MASK; 687 *host_offset = host_cluster_offset + offset_in_cluster; 688 if (offset_into_cluster(s, host_cluster_offset)) { 689 qcow2_signal_corruption(bs, true, -1, -1, 690 "Cluster allocation offset %#" 691 PRIx64 " unaligned (L2 offset: %#" PRIx64 692 ", L2 index: %#x)", host_cluster_offset, 693 l2_offset, l2_index); 694 ret = -EIO; 695 goto fail; 696 } 697 if (has_data_file(bs) && *host_offset != offset) { 698 qcow2_signal_corruption(bs, true, -1, -1, 699 "External data file host cluster offset %#" 700 PRIx64 " does not match guest cluster " 701 "offset: %#" PRIx64 702 ", L2 index: %#x)", host_cluster_offset, 703 offset - offset_in_cluster, l2_index); 704 ret = -EIO; 705 goto fail; 706 } 707 break; 708 } 709 default: 710 abort(); 711 } 712 713 sc = count_contiguous_subclusters(bs, nb_clusters, sc_index, 714 l2_slice, &l2_index); 715 if (sc < 0) { 716 qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster entry found " 717 " (L2 offset: %#" PRIx64 ", L2 index: %#x)", 718 l2_offset, l2_index); 719 ret = -EIO; 720 goto fail; 721 } 722 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 723 724 bytes_available = ((int64_t)sc + sc_index) << s->subcluster_bits; 725 726 out: 727 if (bytes_available > bytes_needed) { 728 bytes_available = bytes_needed; 729 } 730 731 /* bytes_available <= bytes_needed <= *bytes + offset_in_cluster; 732 * subtracting offset_in_cluster will therefore definitely yield something 733 * not exceeding UINT_MAX */ 734 assert(bytes_available - offset_in_cluster <= UINT_MAX); 735 *bytes = bytes_available - offset_in_cluster; 736 737 *subcluster_type = type; 738 739 return 0; 740 741 fail: 742 qcow2_cache_put(s->l2_table_cache, (void **)&l2_slice); 743 return ret; 744 } 745 746 /* 747 * get_cluster_table 748 * 749 * for a given disk offset, load (and allocate if needed) 750 * the appropriate slice of its l2 table. 751 * 752 * the cluster index in the l2 slice is given to the caller. 753 * 754 * Returns 0 on success, -errno in failure case 755 */ 756 static int get_cluster_table(BlockDriverState *bs, uint64_t offset, 757 uint64_t **new_l2_slice, 758 int *new_l2_index) 759 { 760 BDRVQcow2State *s = bs->opaque; 761 unsigned int l2_index; 762 uint64_t l1_index, l2_offset; 763 uint64_t *l2_slice = NULL; 764 int ret; 765 766 /* seek to the l2 offset in the l1 table */ 767 768 l1_index = offset_to_l1_index(s, offset); 769 if (l1_index >= s->l1_size) { 770 ret = qcow2_grow_l1_table(bs, l1_index + 1, false); 771 if (ret < 0) { 772 return ret; 773 } 774 } 775 776 assert(l1_index < s->l1_size); 777 l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 778 if (offset_into_cluster(s, l2_offset)) { 779 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64 780 " unaligned (L1 index: %#" PRIx64 ")", 781 l2_offset, l1_index); 782 return -EIO; 783 } 784 785 if (!(s->l1_table[l1_index] & QCOW_OFLAG_COPIED)) { 786 /* First allocate a new L2 table (and do COW if needed) */ 787 ret = l2_allocate(bs, l1_index); 788 if (ret < 0) { 789 return ret; 790 } 791 792 /* Then decrease the refcount of the old table */ 793 if (l2_offset) { 794 qcow2_free_clusters(bs, l2_offset, s->l2_size * l2_entry_size(s), 795 QCOW2_DISCARD_OTHER); 796 } 797 798 /* Get the offset of the newly-allocated l2 table */ 799 l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 800 assert(offset_into_cluster(s, l2_offset) == 0); 801 } 802 803 /* load the l2 slice in memory */ 804 ret = l2_load(bs, offset, l2_offset, &l2_slice); 805 if (ret < 0) { 806 return ret; 807 } 808 809 /* find the cluster offset for the given disk offset */ 810 811 l2_index = offset_to_l2_slice_index(s, offset); 812 813 *new_l2_slice = l2_slice; 814 *new_l2_index = l2_index; 815 816 return 0; 817 } 818 819 /* 820 * alloc_compressed_cluster_offset 821 * 822 * For a given offset on the virtual disk, allocate a new compressed cluster 823 * and put the host offset of the cluster into *host_offset. If a cluster is 824 * already allocated at the offset, return an error. 825 * 826 * Return 0 on success and -errno in error cases 827 */ 828 int coroutine_fn qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, 829 uint64_t offset, 830 int compressed_size, 831 uint64_t *host_offset) 832 { 833 BDRVQcow2State *s = bs->opaque; 834 int l2_index, ret; 835 uint64_t *l2_slice; 836 int64_t cluster_offset; 837 int nb_csectors; 838 839 if (has_data_file(bs)) { 840 return 0; 841 } 842 843 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 844 if (ret < 0) { 845 return ret; 846 } 847 848 /* Compression can't overwrite anything. Fail if the cluster was already 849 * allocated. */ 850 cluster_offset = get_l2_entry(s, l2_slice, l2_index); 851 if (cluster_offset & L2E_OFFSET_MASK) { 852 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 853 return -EIO; 854 } 855 856 cluster_offset = qcow2_alloc_bytes(bs, compressed_size); 857 if (cluster_offset < 0) { 858 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 859 return cluster_offset; 860 } 861 862 nb_csectors = 863 (cluster_offset + compressed_size - 1) / QCOW2_COMPRESSED_SECTOR_SIZE - 864 (cluster_offset / QCOW2_COMPRESSED_SECTOR_SIZE); 865 866 /* The offset and size must fit in their fields of the L2 table entry */ 867 assert((cluster_offset & s->cluster_offset_mask) == cluster_offset); 868 assert((nb_csectors & s->csize_mask) == nb_csectors); 869 870 cluster_offset |= QCOW_OFLAG_COMPRESSED | 871 ((uint64_t)nb_csectors << s->csize_shift); 872 873 /* update L2 table */ 874 875 /* compressed clusters never have the copied flag */ 876 877 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); 878 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 879 set_l2_entry(s, l2_slice, l2_index, cluster_offset); 880 if (has_subclusters(s)) { 881 set_l2_bitmap(s, l2_slice, l2_index, 0); 882 } 883 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 884 885 *host_offset = cluster_offset & s->cluster_offset_mask; 886 return 0; 887 } 888 889 static int coroutine_fn perform_cow(BlockDriverState *bs, QCowL2Meta *m) 890 { 891 BDRVQcow2State *s = bs->opaque; 892 Qcow2COWRegion *start = &m->cow_start; 893 Qcow2COWRegion *end = &m->cow_end; 894 unsigned buffer_size; 895 unsigned data_bytes = end->offset - (start->offset + start->nb_bytes); 896 bool merge_reads; 897 uint8_t *start_buffer, *end_buffer; 898 QEMUIOVector qiov; 899 int ret; 900 901 assert(start->nb_bytes <= UINT_MAX - end->nb_bytes); 902 assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes); 903 assert(start->offset + start->nb_bytes <= end->offset); 904 905 if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) { 906 return 0; 907 } 908 909 /* If we have to read both the start and end COW regions and the 910 * middle region is not too large then perform just one read 911 * operation */ 912 merge_reads = start->nb_bytes && end->nb_bytes && data_bytes <= 16384; 913 if (merge_reads) { 914 buffer_size = start->nb_bytes + data_bytes + end->nb_bytes; 915 } else { 916 /* If we have to do two reads, add some padding in the middle 917 * if necessary to make sure that the end region is optimally 918 * aligned. */ 919 size_t align = bdrv_opt_mem_align(bs); 920 assert(align > 0 && align <= UINT_MAX); 921 assert(QEMU_ALIGN_UP(start->nb_bytes, align) <= 922 UINT_MAX - end->nb_bytes); 923 buffer_size = QEMU_ALIGN_UP(start->nb_bytes, align) + end->nb_bytes; 924 } 925 926 /* Reserve a buffer large enough to store all the data that we're 927 * going to read */ 928 start_buffer = qemu_try_blockalign(bs, buffer_size); 929 if (start_buffer == NULL) { 930 return -ENOMEM; 931 } 932 /* The part of the buffer where the end region is located */ 933 end_buffer = start_buffer + buffer_size - end->nb_bytes; 934 935 qemu_iovec_init(&qiov, 2 + (m->data_qiov ? 936 qemu_iovec_subvec_niov(m->data_qiov, 937 m->data_qiov_offset, 938 data_bytes) 939 : 0)); 940 941 qemu_co_mutex_unlock(&s->lock); 942 /* First we read the existing data from both COW regions. We 943 * either read the whole region in one go, or the start and end 944 * regions separately. */ 945 if (merge_reads) { 946 qemu_iovec_add(&qiov, start_buffer, buffer_size); 947 ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); 948 } else { 949 qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 950 ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov); 951 if (ret < 0) { 952 goto fail; 953 } 954 955 qemu_iovec_reset(&qiov); 956 qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 957 ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov); 958 } 959 if (ret < 0) { 960 goto fail; 961 } 962 963 /* Encrypt the data if necessary before writing it */ 964 if (bs->encrypted) { 965 ret = qcow2_co_encrypt(bs, 966 m->alloc_offset + start->offset, 967 m->offset + start->offset, 968 start_buffer, start->nb_bytes); 969 if (ret < 0) { 970 goto fail; 971 } 972 973 ret = qcow2_co_encrypt(bs, 974 m->alloc_offset + end->offset, 975 m->offset + end->offset, 976 end_buffer, end->nb_bytes); 977 if (ret < 0) { 978 goto fail; 979 } 980 } 981 982 /* And now we can write everything. If we have the guest data we 983 * can write everything in one single operation */ 984 if (m->data_qiov) { 985 qemu_iovec_reset(&qiov); 986 if (start->nb_bytes) { 987 qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 988 } 989 qemu_iovec_concat(&qiov, m->data_qiov, m->data_qiov_offset, data_bytes); 990 if (end->nb_bytes) { 991 qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 992 } 993 /* NOTE: we have a write_aio blkdebug event here followed by 994 * a cow_write one in do_perform_cow_write(), but there's only 995 * one single I/O operation */ 996 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); 997 ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); 998 } else { 999 /* If there's no guest data then write both COW regions separately */ 1000 qemu_iovec_reset(&qiov); 1001 qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); 1002 ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov); 1003 if (ret < 0) { 1004 goto fail; 1005 } 1006 1007 qemu_iovec_reset(&qiov); 1008 qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); 1009 ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov); 1010 } 1011 1012 fail: 1013 qemu_co_mutex_lock(&s->lock); 1014 1015 /* 1016 * Before we update the L2 table to actually point to the new cluster, we 1017 * need to be sure that the refcounts have been increased and COW was 1018 * handled. 1019 */ 1020 if (ret == 0) { 1021 qcow2_cache_depends_on_flush(s->l2_table_cache); 1022 } 1023 1024 qemu_vfree(start_buffer); 1025 qemu_iovec_destroy(&qiov); 1026 return ret; 1027 } 1028 1029 int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 1030 QCowL2Meta *m) 1031 { 1032 BDRVQcow2State *s = bs->opaque; 1033 int i, j = 0, l2_index, ret; 1034 uint64_t *old_cluster, *l2_slice; 1035 uint64_t cluster_offset = m->alloc_offset; 1036 1037 trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); 1038 assert(m->nb_clusters > 0); 1039 1040 old_cluster = g_try_new(uint64_t, m->nb_clusters); 1041 if (old_cluster == NULL) { 1042 ret = -ENOMEM; 1043 goto err; 1044 } 1045 1046 /* copy content of unmodified sectors */ 1047 ret = perform_cow(bs, m); 1048 if (ret < 0) { 1049 goto err; 1050 } 1051 1052 /* Update L2 table. */ 1053 if (s->use_lazy_refcounts) { 1054 qcow2_mark_dirty(bs); 1055 } 1056 if (qcow2_need_accurate_refcounts(s)) { 1057 qcow2_cache_set_dependency(bs, s->l2_table_cache, 1058 s->refcount_block_cache); 1059 } 1060 1061 ret = get_cluster_table(bs, m->offset, &l2_slice, &l2_index); 1062 if (ret < 0) { 1063 goto err; 1064 } 1065 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 1066 1067 assert(l2_index + m->nb_clusters <= s->l2_slice_size); 1068 assert(m->cow_end.offset + m->cow_end.nb_bytes <= 1069 m->nb_clusters << s->cluster_bits); 1070 for (i = 0; i < m->nb_clusters; i++) { 1071 uint64_t offset = cluster_offset + ((uint64_t)i << s->cluster_bits); 1072 /* if two concurrent writes happen to the same unallocated cluster 1073 * each write allocates separate cluster and writes data concurrently. 1074 * The first one to complete updates l2 table with pointer to its 1075 * cluster the second one has to do RMW (which is done above by 1076 * perform_cow()), update l2 table with its cluster pointer and free 1077 * old cluster. This is what this loop does */ 1078 if (get_l2_entry(s, l2_slice, l2_index + i) != 0) { 1079 old_cluster[j++] = get_l2_entry(s, l2_slice, l2_index + i); 1080 } 1081 1082 /* The offset must fit in the offset field of the L2 table entry */ 1083 assert((offset & L2E_OFFSET_MASK) == offset); 1084 1085 set_l2_entry(s, l2_slice, l2_index + i, offset | QCOW_OFLAG_COPIED); 1086 1087 /* Update bitmap with the subclusters that were just written */ 1088 if (has_subclusters(s) && !m->prealloc) { 1089 uint64_t l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1090 unsigned written_from = m->cow_start.offset; 1091 unsigned written_to = m->cow_end.offset + m->cow_end.nb_bytes; 1092 int first_sc, last_sc; 1093 /* Narrow written_from and written_to down to the current cluster */ 1094 written_from = MAX(written_from, i << s->cluster_bits); 1095 written_to = MIN(written_to, (i + 1) << s->cluster_bits); 1096 assert(written_from < written_to); 1097 first_sc = offset_to_sc_index(s, written_from); 1098 last_sc = offset_to_sc_index(s, written_to - 1); 1099 l2_bitmap |= QCOW_OFLAG_SUB_ALLOC_RANGE(first_sc, last_sc + 1); 1100 l2_bitmap &= ~QCOW_OFLAG_SUB_ZERO_RANGE(first_sc, last_sc + 1); 1101 set_l2_bitmap(s, l2_slice, l2_index + i, l2_bitmap); 1102 } 1103 } 1104 1105 1106 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1107 1108 /* 1109 * If this was a COW, we need to decrease the refcount of the old cluster. 1110 * 1111 * Don't discard clusters that reach a refcount of 0 (e.g. compressed 1112 * clusters), the next write will reuse them anyway. 1113 */ 1114 if (!m->keep_old_clusters && j != 0) { 1115 for (i = 0; i < j; i++) { 1116 qcow2_free_any_cluster(bs, old_cluster[i], QCOW2_DISCARD_NEVER); 1117 } 1118 } 1119 1120 ret = 0; 1121 err: 1122 g_free(old_cluster); 1123 return ret; 1124 } 1125 1126 /** 1127 * Frees the allocated clusters because the request failed and they won't 1128 * actually be linked. 1129 */ 1130 void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) 1131 { 1132 BDRVQcow2State *s = bs->opaque; 1133 if (!has_data_file(bs) && !m->keep_old_clusters) { 1134 qcow2_free_clusters(bs, m->alloc_offset, 1135 m->nb_clusters << s->cluster_bits, 1136 QCOW2_DISCARD_NEVER); 1137 } 1138 } 1139 1140 /* 1141 * For a given write request, create a new QCowL2Meta structure, add 1142 * it to @m and the BDRVQcow2State.cluster_allocs list. If the write 1143 * request does not need copy-on-write or changes to the L2 metadata 1144 * then this function does nothing. 1145 * 1146 * @host_cluster_offset points to the beginning of the first cluster. 1147 * 1148 * @guest_offset and @bytes indicate the offset and length of the 1149 * request. 1150 * 1151 * @l2_slice contains the L2 entries of all clusters involved in this 1152 * write request. 1153 * 1154 * If @keep_old is true it means that the clusters were already 1155 * allocated and will be overwritten. If false then the clusters are 1156 * new and we have to decrease the reference count of the old ones. 1157 * 1158 * Returns 0 on success, -errno on failure. 1159 */ 1160 static int calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset, 1161 uint64_t guest_offset, unsigned bytes, 1162 uint64_t *l2_slice, QCowL2Meta **m, bool keep_old) 1163 { 1164 BDRVQcow2State *s = bs->opaque; 1165 int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset); 1166 uint64_t l2_entry, l2_bitmap; 1167 unsigned cow_start_from, cow_end_to; 1168 unsigned cow_start_to = offset_into_cluster(s, guest_offset); 1169 unsigned cow_end_from = cow_start_to + bytes; 1170 unsigned nb_clusters = size_to_clusters(s, cow_end_from); 1171 QCowL2Meta *old_m = *m; 1172 QCow2SubclusterType type; 1173 int i; 1174 bool skip_cow = keep_old; 1175 1176 assert(nb_clusters <= s->l2_slice_size - l2_index); 1177 1178 /* Check the type of all affected subclusters */ 1179 for (i = 0; i < nb_clusters; i++) { 1180 l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1181 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1182 if (skip_cow) { 1183 unsigned write_from = MAX(cow_start_to, i << s->cluster_bits); 1184 unsigned write_to = MIN(cow_end_from, (i + 1) << s->cluster_bits); 1185 int first_sc = offset_to_sc_index(s, write_from); 1186 int last_sc = offset_to_sc_index(s, write_to - 1); 1187 int cnt = qcow2_get_subcluster_range_type(bs, l2_entry, l2_bitmap, 1188 first_sc, &type); 1189 /* Is any of the subclusters of type != QCOW2_SUBCLUSTER_NORMAL ? */ 1190 if (type != QCOW2_SUBCLUSTER_NORMAL || first_sc + cnt <= last_sc) { 1191 skip_cow = false; 1192 } 1193 } else { 1194 /* If we can't skip the cow we can still look for invalid entries */ 1195 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, 0); 1196 } 1197 if (type == QCOW2_SUBCLUSTER_INVALID) { 1198 int l1_index = offset_to_l1_index(s, guest_offset); 1199 uint64_t l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; 1200 qcow2_signal_corruption(bs, true, -1, -1, "Invalid cluster " 1201 "entry found (L2 offset: %#" PRIx64 1202 ", L2 index: %#x)", 1203 l2_offset, l2_index + i); 1204 return -EIO; 1205 } 1206 } 1207 1208 if (skip_cow) { 1209 return 0; 1210 } 1211 1212 /* Get the L2 entry of the first cluster */ 1213 l2_entry = get_l2_entry(s, l2_slice, l2_index); 1214 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 1215 sc_index = offset_to_sc_index(s, guest_offset); 1216 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 1217 1218 if (!keep_old) { 1219 switch (type) { 1220 case QCOW2_SUBCLUSTER_COMPRESSED: 1221 cow_start_from = 0; 1222 break; 1223 case QCOW2_SUBCLUSTER_NORMAL: 1224 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1225 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1226 if (has_subclusters(s)) { 1227 /* Skip all leading zero and unallocated subclusters */ 1228 uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; 1229 cow_start_from = 1230 MIN(sc_index, ctz32(alloc_bitmap)) << s->subcluster_bits; 1231 } else { 1232 cow_start_from = 0; 1233 } 1234 break; 1235 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 1236 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 1237 cow_start_from = sc_index << s->subcluster_bits; 1238 break; 1239 default: 1240 g_assert_not_reached(); 1241 } 1242 } else { 1243 switch (type) { 1244 case QCOW2_SUBCLUSTER_NORMAL: 1245 cow_start_from = cow_start_to; 1246 break; 1247 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1248 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1249 cow_start_from = sc_index << s->subcluster_bits; 1250 break; 1251 default: 1252 g_assert_not_reached(); 1253 } 1254 } 1255 1256 /* Get the L2 entry of the last cluster */ 1257 l2_index += nb_clusters - 1; 1258 l2_entry = get_l2_entry(s, l2_slice, l2_index); 1259 l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 1260 sc_index = offset_to_sc_index(s, guest_offset + bytes - 1); 1261 type = qcow2_get_subcluster_type(bs, l2_entry, l2_bitmap, sc_index); 1262 1263 if (!keep_old) { 1264 switch (type) { 1265 case QCOW2_SUBCLUSTER_COMPRESSED: 1266 cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); 1267 break; 1268 case QCOW2_SUBCLUSTER_NORMAL: 1269 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1270 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1271 cow_end_to = ROUND_UP(cow_end_from, s->cluster_size); 1272 if (has_subclusters(s)) { 1273 /* Skip all trailing zero and unallocated subclusters */ 1274 uint32_t alloc_bitmap = l2_bitmap & QCOW_L2_BITMAP_ALL_ALLOC; 1275 cow_end_to -= 1276 MIN(s->subclusters_per_cluster - sc_index - 1, 1277 clz32(alloc_bitmap)) << s->subcluster_bits; 1278 } 1279 break; 1280 case QCOW2_SUBCLUSTER_ZERO_PLAIN: 1281 case QCOW2_SUBCLUSTER_UNALLOCATED_PLAIN: 1282 cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); 1283 break; 1284 default: 1285 g_assert_not_reached(); 1286 } 1287 } else { 1288 switch (type) { 1289 case QCOW2_SUBCLUSTER_NORMAL: 1290 cow_end_to = cow_end_from; 1291 break; 1292 case QCOW2_SUBCLUSTER_ZERO_ALLOC: 1293 case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC: 1294 cow_end_to = ROUND_UP(cow_end_from, s->subcluster_size); 1295 break; 1296 default: 1297 g_assert_not_reached(); 1298 } 1299 } 1300 1301 *m = g_malloc0(sizeof(**m)); 1302 **m = (QCowL2Meta) { 1303 .next = old_m, 1304 1305 .alloc_offset = host_cluster_offset, 1306 .offset = start_of_cluster(s, guest_offset), 1307 .nb_clusters = nb_clusters, 1308 1309 .keep_old_clusters = keep_old, 1310 1311 .cow_start = { 1312 .offset = cow_start_from, 1313 .nb_bytes = cow_start_to - cow_start_from, 1314 }, 1315 .cow_end = { 1316 .offset = cow_end_from, 1317 .nb_bytes = cow_end_to - cow_end_from, 1318 }, 1319 }; 1320 1321 qemu_co_queue_init(&(*m)->dependent_requests); 1322 QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); 1323 1324 return 0; 1325 } 1326 1327 /* 1328 * Returns true if writing to the cluster pointed to by @l2_entry 1329 * requires a new allocation (that is, if the cluster is unallocated 1330 * or has refcount > 1 and therefore cannot be written in-place). 1331 */ 1332 static bool cluster_needs_new_alloc(BlockDriverState *bs, uint64_t l2_entry) 1333 { 1334 switch (qcow2_get_cluster_type(bs, l2_entry)) { 1335 case QCOW2_CLUSTER_NORMAL: 1336 case QCOW2_CLUSTER_ZERO_ALLOC: 1337 if (l2_entry & QCOW_OFLAG_COPIED) { 1338 return false; 1339 } 1340 /* fallthrough */ 1341 case QCOW2_CLUSTER_UNALLOCATED: 1342 case QCOW2_CLUSTER_COMPRESSED: 1343 case QCOW2_CLUSTER_ZERO_PLAIN: 1344 return true; 1345 default: 1346 abort(); 1347 } 1348 } 1349 1350 /* 1351 * Returns the number of contiguous clusters that can be written to 1352 * using one single write request, starting from @l2_index. 1353 * At most @nb_clusters are checked. 1354 * 1355 * If @new_alloc is true this counts clusters that are either 1356 * unallocated, or allocated but with refcount > 1 (so they need to be 1357 * newly allocated and COWed). 1358 * 1359 * If @new_alloc is false this counts clusters that are already 1360 * allocated and can be overwritten in-place (this includes clusters 1361 * of type QCOW2_CLUSTER_ZERO_ALLOC). 1362 */ 1363 static int count_single_write_clusters(BlockDriverState *bs, int nb_clusters, 1364 uint64_t *l2_slice, int l2_index, 1365 bool new_alloc) 1366 { 1367 BDRVQcow2State *s = bs->opaque; 1368 uint64_t l2_entry = get_l2_entry(s, l2_slice, l2_index); 1369 uint64_t expected_offset = l2_entry & L2E_OFFSET_MASK; 1370 int i; 1371 1372 for (i = 0; i < nb_clusters; i++) { 1373 l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1374 if (cluster_needs_new_alloc(bs, l2_entry) != new_alloc) { 1375 break; 1376 } 1377 if (!new_alloc) { 1378 if (expected_offset != (l2_entry & L2E_OFFSET_MASK)) { 1379 break; 1380 } 1381 expected_offset += s->cluster_size; 1382 } 1383 } 1384 1385 assert(i <= nb_clusters); 1386 return i; 1387 } 1388 1389 /* 1390 * Check if there already is an AIO write request in flight which allocates 1391 * the same cluster. In this case we need to wait until the previous 1392 * request has completed and updated the L2 table accordingly. 1393 * 1394 * Returns: 1395 * 0 if there was no dependency. *cur_bytes indicates the number of 1396 * bytes from guest_offset that can be read before the next 1397 * dependency must be processed (or the request is complete) 1398 * 1399 * -EAGAIN if we had to wait for another request, previously gathered 1400 * information on cluster allocation may be invalid now. The caller 1401 * must start over anyway, so consider *cur_bytes undefined. 1402 */ 1403 static int coroutine_fn handle_dependencies(BlockDriverState *bs, 1404 uint64_t guest_offset, 1405 uint64_t *cur_bytes, QCowL2Meta **m) 1406 { 1407 BDRVQcow2State *s = bs->opaque; 1408 QCowL2Meta *old_alloc; 1409 uint64_t bytes = *cur_bytes; 1410 1411 QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { 1412 1413 uint64_t start = guest_offset; 1414 uint64_t end = start + bytes; 1415 uint64_t old_start = start_of_cluster(s, l2meta_cow_start(old_alloc)); 1416 uint64_t old_end = ROUND_UP(l2meta_cow_end(old_alloc), s->cluster_size); 1417 1418 if (end <= old_start || start >= old_end) { 1419 /* No intersection */ 1420 continue; 1421 } 1422 1423 if (old_alloc->keep_old_clusters && 1424 (end <= l2meta_cow_start(old_alloc) || 1425 start >= l2meta_cow_end(old_alloc))) 1426 { 1427 /* 1428 * Clusters intersect but COW areas don't. And cluster itself is 1429 * already allocated. So, there is no actual conflict. 1430 */ 1431 continue; 1432 } 1433 1434 /* Conflict */ 1435 1436 if (start < old_start) { 1437 /* Stop at the start of a running allocation */ 1438 bytes = old_start - start; 1439 } else { 1440 bytes = 0; 1441 } 1442 1443 /* 1444 * Stop if an l2meta already exists. After yielding, it wouldn't 1445 * be valid any more, so we'd have to clean up the old L2Metas 1446 * and deal with requests depending on them before starting to 1447 * gather new ones. Not worth the trouble. 1448 */ 1449 if (bytes == 0 && *m) { 1450 *cur_bytes = 0; 1451 return 0; 1452 } 1453 1454 if (bytes == 0) { 1455 /* 1456 * Wait for the dependency to complete. We need to recheck 1457 * the free/allocated clusters when we continue. 1458 */ 1459 qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock); 1460 return -EAGAIN; 1461 } 1462 } 1463 1464 /* Make sure that existing clusters and new allocations are only used up to 1465 * the next dependency if we shortened the request above */ 1466 *cur_bytes = bytes; 1467 1468 return 0; 1469 } 1470 1471 /* 1472 * Checks how many already allocated clusters that don't require a new 1473 * allocation there are at the given guest_offset (up to *bytes). 1474 * If *host_offset is not INV_OFFSET, only physically contiguous clusters 1475 * beginning at this host offset are counted. 1476 * 1477 * Note that guest_offset may not be cluster aligned. In this case, the 1478 * returned *host_offset points to exact byte referenced by guest_offset and 1479 * therefore isn't cluster aligned as well. 1480 * 1481 * Returns: 1482 * 0: if no allocated clusters are available at the given offset. 1483 * *bytes is normally unchanged. It is set to 0 if the cluster 1484 * is allocated and can be overwritten in-place but doesn't have 1485 * the right physical offset. 1486 * 1487 * 1: if allocated clusters that can be overwritten in place are 1488 * available at the requested offset. *bytes may have decreased 1489 * and describes the length of the area that can be written to. 1490 * 1491 * -errno: in error cases 1492 */ 1493 static int coroutine_fn handle_copied(BlockDriverState *bs, 1494 uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes, 1495 QCowL2Meta **m) 1496 { 1497 BDRVQcow2State *s = bs->opaque; 1498 int l2_index; 1499 uint64_t l2_entry, cluster_offset; 1500 uint64_t *l2_slice; 1501 uint64_t nb_clusters; 1502 unsigned int keep_clusters; 1503 int ret; 1504 1505 trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, 1506 *bytes); 1507 1508 assert(*host_offset == INV_OFFSET || offset_into_cluster(s, guest_offset) 1509 == offset_into_cluster(s, *host_offset)); 1510 1511 /* 1512 * Calculate the number of clusters to look for. We stop at L2 slice 1513 * boundaries to keep things simple. 1514 */ 1515 nb_clusters = 1516 size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 1517 1518 l2_index = offset_to_l2_slice_index(s, guest_offset); 1519 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 1520 /* Limit total byte count to BDRV_REQUEST_MAX_BYTES */ 1521 nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); 1522 1523 /* Find L2 entry for the first involved cluster */ 1524 ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); 1525 if (ret < 0) { 1526 return ret; 1527 } 1528 1529 l2_entry = get_l2_entry(s, l2_slice, l2_index); 1530 cluster_offset = l2_entry & L2E_OFFSET_MASK; 1531 1532 if (!cluster_needs_new_alloc(bs, l2_entry)) { 1533 if (offset_into_cluster(s, cluster_offset)) { 1534 qcow2_signal_corruption(bs, true, -1, -1, "%s cluster offset " 1535 "%#" PRIx64 " unaligned (guest offset: %#" 1536 PRIx64 ")", l2_entry & QCOW_OFLAG_ZERO ? 1537 "Preallocated zero" : "Data", 1538 cluster_offset, guest_offset); 1539 ret = -EIO; 1540 goto out; 1541 } 1542 1543 /* If a specific host_offset is required, check it */ 1544 if (*host_offset != INV_OFFSET && cluster_offset != *host_offset) { 1545 *bytes = 0; 1546 ret = 0; 1547 goto out; 1548 } 1549 1550 /* We keep all QCOW_OFLAG_COPIED clusters */ 1551 keep_clusters = count_single_write_clusters(bs, nb_clusters, l2_slice, 1552 l2_index, false); 1553 assert(keep_clusters <= nb_clusters); 1554 1555 *bytes = MIN(*bytes, 1556 keep_clusters * s->cluster_size 1557 - offset_into_cluster(s, guest_offset)); 1558 assert(*bytes != 0); 1559 1560 ret = calculate_l2_meta(bs, cluster_offset, guest_offset, 1561 *bytes, l2_slice, m, true); 1562 if (ret < 0) { 1563 goto out; 1564 } 1565 1566 ret = 1; 1567 } else { 1568 ret = 0; 1569 } 1570 1571 /* Cleanup */ 1572 out: 1573 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1574 1575 /* Only return a host offset if we actually made progress. Otherwise we 1576 * would make requirements for handle_alloc() that it can't fulfill */ 1577 if (ret > 0) { 1578 *host_offset = cluster_offset + offset_into_cluster(s, guest_offset); 1579 } 1580 1581 return ret; 1582 } 1583 1584 /* 1585 * Allocates new clusters for the given guest_offset. 1586 * 1587 * At most *nb_clusters are allocated, and on return *nb_clusters is updated to 1588 * contain the number of clusters that have been allocated and are contiguous 1589 * in the image file. 1590 * 1591 * If *host_offset is not INV_OFFSET, it specifies the offset in the image file 1592 * at which the new clusters must start. *nb_clusters can be 0 on return in 1593 * this case if the cluster at host_offset is already in use. If *host_offset 1594 * is INV_OFFSET, the clusters can be allocated anywhere in the image file. 1595 * 1596 * *host_offset is updated to contain the offset into the image file at which 1597 * the first allocated cluster starts. 1598 * 1599 * Return 0 on success and -errno in error cases. -EAGAIN means that the 1600 * function has been waiting for another request and the allocation must be 1601 * restarted, but the whole request should not be failed. 1602 */ 1603 static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, 1604 uint64_t *host_offset, uint64_t *nb_clusters) 1605 { 1606 BDRVQcow2State *s = bs->opaque; 1607 1608 trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, 1609 *host_offset, *nb_clusters); 1610 1611 if (has_data_file(bs)) { 1612 assert(*host_offset == INV_OFFSET || 1613 *host_offset == start_of_cluster(s, guest_offset)); 1614 *host_offset = start_of_cluster(s, guest_offset); 1615 return 0; 1616 } 1617 1618 /* Allocate new clusters */ 1619 trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); 1620 if (*host_offset == INV_OFFSET) { 1621 int64_t cluster_offset = 1622 qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size); 1623 if (cluster_offset < 0) { 1624 return cluster_offset; 1625 } 1626 *host_offset = cluster_offset; 1627 return 0; 1628 } else { 1629 int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); 1630 if (ret < 0) { 1631 return ret; 1632 } 1633 *nb_clusters = ret; 1634 return 0; 1635 } 1636 } 1637 1638 /* 1639 * Allocates new clusters for an area that is either still unallocated or 1640 * cannot be overwritten in-place. If *host_offset is not INV_OFFSET, 1641 * clusters are only allocated if the new allocation can match the specified 1642 * host offset. 1643 * 1644 * Note that guest_offset may not be cluster aligned. In this case, the 1645 * returned *host_offset points to exact byte referenced by guest_offset and 1646 * therefore isn't cluster aligned as well. 1647 * 1648 * Returns: 1649 * 0: if no clusters could be allocated. *bytes is set to 0, 1650 * *host_offset is left unchanged. 1651 * 1652 * 1: if new clusters were allocated. *bytes may be decreased if the 1653 * new allocation doesn't cover all of the requested area. 1654 * *host_offset is updated to contain the host offset of the first 1655 * newly allocated cluster. 1656 * 1657 * -errno: in error cases 1658 */ 1659 static int coroutine_fn handle_alloc(BlockDriverState *bs, 1660 uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes, 1661 QCowL2Meta **m) 1662 { 1663 BDRVQcow2State *s = bs->opaque; 1664 int l2_index; 1665 uint64_t *l2_slice; 1666 uint64_t nb_clusters; 1667 int ret; 1668 1669 uint64_t alloc_cluster_offset; 1670 1671 trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, 1672 *bytes); 1673 assert(*bytes > 0); 1674 1675 /* 1676 * Calculate the number of clusters to look for. We stop at L2 slice 1677 * boundaries to keep things simple. 1678 */ 1679 nb_clusters = 1680 size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); 1681 1682 l2_index = offset_to_l2_slice_index(s, guest_offset); 1683 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 1684 /* Limit total allocation byte count to BDRV_REQUEST_MAX_BYTES */ 1685 nb_clusters = MIN(nb_clusters, BDRV_REQUEST_MAX_BYTES >> s->cluster_bits); 1686 1687 /* Find L2 entry for the first involved cluster */ 1688 ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index); 1689 if (ret < 0) { 1690 return ret; 1691 } 1692 1693 nb_clusters = count_single_write_clusters(bs, nb_clusters, 1694 l2_slice, l2_index, true); 1695 1696 /* This function is only called when there were no non-COW clusters, so if 1697 * we can't find any unallocated or COW clusters either, something is 1698 * wrong with our code. */ 1699 assert(nb_clusters > 0); 1700 1701 /* Allocate at a given offset in the image file */ 1702 alloc_cluster_offset = *host_offset == INV_OFFSET ? INV_OFFSET : 1703 start_of_cluster(s, *host_offset); 1704 ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, 1705 &nb_clusters); 1706 if (ret < 0) { 1707 goto out; 1708 } 1709 1710 /* Can't extend contiguous allocation */ 1711 if (nb_clusters == 0) { 1712 *bytes = 0; 1713 ret = 0; 1714 goto out; 1715 } 1716 1717 assert(alloc_cluster_offset != INV_OFFSET); 1718 1719 /* 1720 * Save info needed for meta data update. 1721 * 1722 * requested_bytes: Number of bytes from the start of the first 1723 * newly allocated cluster to the end of the (possibly shortened 1724 * before) write request. 1725 * 1726 * avail_bytes: Number of bytes from the start of the first 1727 * newly allocated to the end of the last newly allocated cluster. 1728 * 1729 * nb_bytes: The number of bytes from the start of the first 1730 * newly allocated cluster to the end of the area that the write 1731 * request actually writes to (excluding COW at the end) 1732 */ 1733 uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset); 1734 int avail_bytes = nb_clusters << s->cluster_bits; 1735 int nb_bytes = MIN(requested_bytes, avail_bytes); 1736 1737 *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); 1738 *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset)); 1739 assert(*bytes != 0); 1740 1741 ret = calculate_l2_meta(bs, alloc_cluster_offset, guest_offset, *bytes, 1742 l2_slice, m, false); 1743 if (ret < 0) { 1744 goto out; 1745 } 1746 1747 ret = 1; 1748 1749 out: 1750 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1751 return ret; 1752 } 1753 1754 /* 1755 * For a given area on the virtual disk defined by @offset and @bytes, 1756 * find the corresponding area on the qcow2 image, allocating new 1757 * clusters (or subclusters) if necessary. The result can span a 1758 * combination of allocated and previously unallocated clusters. 1759 * 1760 * Note that offset may not be cluster aligned. In this case, the returned 1761 * *host_offset points to exact byte referenced by offset and therefore 1762 * isn't cluster aligned as well. 1763 * 1764 * On return, @host_offset is set to the beginning of the requested 1765 * area. This area is guaranteed to be contiguous on the qcow2 file 1766 * but it can be smaller than initially requested. In this case @bytes 1767 * is updated with the actual size. 1768 * 1769 * If any clusters or subclusters were allocated then @m contains a 1770 * list with the information of all the affected regions. Note that 1771 * this can happen regardless of whether this function succeeds or 1772 * not. The caller is responsible for updating the L2 metadata of the 1773 * allocated clusters (on success) or freeing them (on failure), and 1774 * for clearing the contents of @m afterwards in both cases. 1775 * 1776 * If the request conflicts with another write request in flight, the coroutine 1777 * is queued and will be reentered when the dependency has completed. 1778 * 1779 * Return 0 on success and -errno in error cases 1780 */ 1781 int coroutine_fn qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, 1782 unsigned int *bytes, 1783 uint64_t *host_offset, 1784 QCowL2Meta **m) 1785 { 1786 BDRVQcow2State *s = bs->opaque; 1787 uint64_t start, remaining; 1788 uint64_t cluster_offset; 1789 uint64_t cur_bytes; 1790 int ret; 1791 1792 trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes); 1793 1794 again: 1795 start = offset; 1796 remaining = *bytes; 1797 cluster_offset = INV_OFFSET; 1798 *host_offset = INV_OFFSET; 1799 cur_bytes = 0; 1800 *m = NULL; 1801 1802 while (true) { 1803 1804 if (*host_offset == INV_OFFSET && cluster_offset != INV_OFFSET) { 1805 *host_offset = cluster_offset; 1806 } 1807 1808 assert(remaining >= cur_bytes); 1809 1810 start += cur_bytes; 1811 remaining -= cur_bytes; 1812 1813 if (cluster_offset != INV_OFFSET) { 1814 cluster_offset += cur_bytes; 1815 } 1816 1817 if (remaining == 0) { 1818 break; 1819 } 1820 1821 cur_bytes = remaining; 1822 1823 /* 1824 * Now start gathering as many contiguous clusters as possible: 1825 * 1826 * 1. Check for overlaps with in-flight allocations 1827 * 1828 * a) Overlap not in the first cluster -> shorten this request and 1829 * let the caller handle the rest in its next loop iteration. 1830 * 1831 * b) Real overlaps of two requests. Yield and restart the search 1832 * for contiguous clusters (the situation could have changed 1833 * while we were sleeping) 1834 * 1835 * c) TODO: Request starts in the same cluster as the in-flight 1836 * allocation ends. Shorten the COW of the in-fight allocation, 1837 * set cluster_offset to write to the same cluster and set up 1838 * the right synchronisation between the in-flight request and 1839 * the new one. 1840 */ 1841 ret = handle_dependencies(bs, start, &cur_bytes, m); 1842 if (ret == -EAGAIN) { 1843 /* Currently handle_dependencies() doesn't yield if we already had 1844 * an allocation. If it did, we would have to clean up the L2Meta 1845 * structs before starting over. */ 1846 assert(*m == NULL); 1847 goto again; 1848 } else if (ret < 0) { 1849 return ret; 1850 } else if (cur_bytes == 0) { 1851 break; 1852 } else { 1853 /* handle_dependencies() may have decreased cur_bytes (shortened 1854 * the allocations below) so that the next dependency is processed 1855 * correctly during the next loop iteration. */ 1856 } 1857 1858 /* 1859 * 2. Count contiguous COPIED clusters. 1860 */ 1861 ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); 1862 if (ret < 0) { 1863 return ret; 1864 } else if (ret) { 1865 continue; 1866 } else if (cur_bytes == 0) { 1867 break; 1868 } 1869 1870 /* 1871 * 3. If the request still hasn't completed, allocate new clusters, 1872 * considering any cluster_offset of steps 1c or 2. 1873 */ 1874 ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); 1875 if (ret < 0) { 1876 return ret; 1877 } else if (ret) { 1878 continue; 1879 } else { 1880 assert(cur_bytes == 0); 1881 break; 1882 } 1883 } 1884 1885 *bytes -= remaining; 1886 assert(*bytes > 0); 1887 assert(*host_offset != INV_OFFSET); 1888 assert(offset_into_cluster(s, *host_offset) == 1889 offset_into_cluster(s, offset)); 1890 1891 return 0; 1892 } 1893 1894 /* 1895 * This discards as many clusters of nb_clusters as possible at once (i.e. 1896 * all clusters in the same L2 slice) and returns the number of discarded 1897 * clusters. 1898 */ 1899 static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, 1900 uint64_t nb_clusters, 1901 enum qcow2_discard_type type, bool full_discard) 1902 { 1903 BDRVQcow2State *s = bs->opaque; 1904 uint64_t *l2_slice; 1905 int l2_index; 1906 int ret; 1907 int i; 1908 1909 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 1910 if (ret < 0) { 1911 return ret; 1912 } 1913 1914 /* Limit nb_clusters to one L2 slice */ 1915 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 1916 assert(nb_clusters <= INT_MAX); 1917 1918 for (i = 0; i < nb_clusters; i++) { 1919 uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 1920 uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 1921 uint64_t new_l2_entry = old_l2_entry; 1922 uint64_t new_l2_bitmap = old_l2_bitmap; 1923 QCow2ClusterType cluster_type = 1924 qcow2_get_cluster_type(bs, old_l2_entry); 1925 1926 /* 1927 * If full_discard is true, the cluster should not read back as zeroes, 1928 * but rather fall through to the backing file. 1929 * 1930 * If full_discard is false, make sure that a discarded area reads back 1931 * as zeroes for v3 images (we cannot do it for v2 without actually 1932 * writing a zero-filled buffer). We can skip the operation if the 1933 * cluster is already marked as zero, or if it's unallocated and we 1934 * don't have a backing file. 1935 * 1936 * TODO We might want to use bdrv_block_status(bs) here, but we're 1937 * holding s->lock, so that doesn't work today. 1938 */ 1939 if (full_discard) { 1940 new_l2_entry = new_l2_bitmap = 0; 1941 } else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) { 1942 if (has_subclusters(s)) { 1943 new_l2_entry = 0; 1944 new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; 1945 } else { 1946 new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0; 1947 } 1948 } 1949 1950 if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { 1951 continue; 1952 } 1953 1954 /* First remove L2 entries */ 1955 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 1956 set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); 1957 if (has_subclusters(s)) { 1958 set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); 1959 } 1960 /* Then decrease the refcount */ 1961 qcow2_free_any_cluster(bs, old_l2_entry, type); 1962 } 1963 1964 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1965 1966 return nb_clusters; 1967 } 1968 1969 int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, 1970 uint64_t bytes, enum qcow2_discard_type type, 1971 bool full_discard) 1972 { 1973 BDRVQcow2State *s = bs->opaque; 1974 uint64_t end_offset = offset + bytes; 1975 uint64_t nb_clusters; 1976 int64_t cleared; 1977 int ret; 1978 1979 /* Caller must pass aligned values, except at image end */ 1980 assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); 1981 assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) || 1982 end_offset == bs->total_sectors << BDRV_SECTOR_BITS); 1983 1984 nb_clusters = size_to_clusters(s, bytes); 1985 1986 s->cache_discards = true; 1987 1988 /* Each L2 slice is handled by its own loop iteration */ 1989 while (nb_clusters > 0) { 1990 cleared = discard_in_l2_slice(bs, offset, nb_clusters, type, 1991 full_discard); 1992 if (cleared < 0) { 1993 ret = cleared; 1994 goto fail; 1995 } 1996 1997 nb_clusters -= cleared; 1998 offset += (cleared * s->cluster_size); 1999 } 2000 2001 ret = 0; 2002 fail: 2003 s->cache_discards = false; 2004 qcow2_process_discards(bs, ret); 2005 2006 return ret; 2007 } 2008 2009 /* 2010 * This zeroes as many clusters of nb_clusters as possible at once (i.e. 2011 * all clusters in the same L2 slice) and returns the number of zeroed 2012 * clusters. 2013 */ 2014 static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, 2015 uint64_t nb_clusters, int flags) 2016 { 2017 BDRVQcow2State *s = bs->opaque; 2018 uint64_t *l2_slice; 2019 int l2_index; 2020 int ret; 2021 int i; 2022 2023 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 2024 if (ret < 0) { 2025 return ret; 2026 } 2027 2028 /* Limit nb_clusters to one L2 slice */ 2029 nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index); 2030 assert(nb_clusters <= INT_MAX); 2031 2032 for (i = 0; i < nb_clusters; i++) { 2033 uint64_t old_l2_entry = get_l2_entry(s, l2_slice, l2_index + i); 2034 uint64_t old_l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index + i); 2035 QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry); 2036 bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) || 2037 ((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type)); 2038 uint64_t new_l2_entry = unmap ? 0 : old_l2_entry; 2039 uint64_t new_l2_bitmap = old_l2_bitmap; 2040 2041 if (has_subclusters(s)) { 2042 new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES; 2043 } else { 2044 new_l2_entry |= QCOW_OFLAG_ZERO; 2045 } 2046 2047 if (old_l2_entry == new_l2_entry && old_l2_bitmap == new_l2_bitmap) { 2048 continue; 2049 } 2050 2051 /* First update L2 entries */ 2052 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 2053 set_l2_entry(s, l2_slice, l2_index + i, new_l2_entry); 2054 if (has_subclusters(s)) { 2055 set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap); 2056 } 2057 2058 /* Then decrease the refcount */ 2059 if (unmap) { 2060 qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST); 2061 } 2062 } 2063 2064 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2065 2066 return nb_clusters; 2067 } 2068 2069 static int zero_l2_subclusters(BlockDriverState *bs, uint64_t offset, 2070 unsigned nb_subclusters) 2071 { 2072 BDRVQcow2State *s = bs->opaque; 2073 uint64_t *l2_slice; 2074 uint64_t old_l2_bitmap, l2_bitmap; 2075 int l2_index, ret, sc = offset_to_sc_index(s, offset); 2076 2077 /* For full clusters use zero_in_l2_slice() instead */ 2078 assert(nb_subclusters > 0 && nb_subclusters < s->subclusters_per_cluster); 2079 assert(sc + nb_subclusters <= s->subclusters_per_cluster); 2080 assert(offset_into_subcluster(s, offset) == 0); 2081 2082 ret = get_cluster_table(bs, offset, &l2_slice, &l2_index); 2083 if (ret < 0) { 2084 return ret; 2085 } 2086 2087 switch (qcow2_get_cluster_type(bs, get_l2_entry(s, l2_slice, l2_index))) { 2088 case QCOW2_CLUSTER_COMPRESSED: 2089 ret = -ENOTSUP; /* We cannot partially zeroize compressed clusters */ 2090 goto out; 2091 case QCOW2_CLUSTER_NORMAL: 2092 case QCOW2_CLUSTER_UNALLOCATED: 2093 break; 2094 default: 2095 g_assert_not_reached(); 2096 } 2097 2098 old_l2_bitmap = l2_bitmap = get_l2_bitmap(s, l2_slice, l2_index); 2099 2100 l2_bitmap |= QCOW_OFLAG_SUB_ZERO_RANGE(sc, sc + nb_subclusters); 2101 l2_bitmap &= ~QCOW_OFLAG_SUB_ALLOC_RANGE(sc, sc + nb_subclusters); 2102 2103 if (old_l2_bitmap != l2_bitmap) { 2104 set_l2_bitmap(s, l2_slice, l2_index, l2_bitmap); 2105 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 2106 } 2107 2108 ret = 0; 2109 out: 2110 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2111 2112 return ret; 2113 } 2114 2115 int coroutine_fn qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, 2116 uint64_t bytes, int flags) 2117 { 2118 BDRVQcow2State *s = bs->opaque; 2119 uint64_t end_offset = offset + bytes; 2120 uint64_t nb_clusters; 2121 unsigned head, tail; 2122 int64_t cleared; 2123 int ret; 2124 2125 /* If we have to stay in sync with an external data file, zero out 2126 * s->data_file first. */ 2127 if (data_file_is_raw(bs)) { 2128 assert(has_data_file(bs)); 2129 ret = bdrv_co_pwrite_zeroes(s->data_file, offset, bytes, flags); 2130 if (ret < 0) { 2131 return ret; 2132 } 2133 } 2134 2135 /* Caller must pass aligned values, except at image end */ 2136 assert(offset_into_subcluster(s, offset) == 0); 2137 assert(offset_into_subcluster(s, end_offset) == 0 || 2138 end_offset >= bs->total_sectors << BDRV_SECTOR_BITS); 2139 2140 /* 2141 * The zero flag is only supported by version 3 and newer. However, if we 2142 * have no backing file, we can resort to discard in version 2. 2143 */ 2144 if (s->qcow_version < 3) { 2145 if (!bs->backing) { 2146 return qcow2_cluster_discard(bs, offset, bytes, 2147 QCOW2_DISCARD_REQUEST, false); 2148 } 2149 return -ENOTSUP; 2150 } 2151 2152 head = MIN(end_offset, ROUND_UP(offset, s->cluster_size)) - offset; 2153 offset += head; 2154 2155 tail = (end_offset >= bs->total_sectors << BDRV_SECTOR_BITS) ? 0 : 2156 end_offset - MAX(offset, start_of_cluster(s, end_offset)); 2157 end_offset -= tail; 2158 2159 s->cache_discards = true; 2160 2161 if (head) { 2162 ret = zero_l2_subclusters(bs, offset - head, 2163 size_to_subclusters(s, head)); 2164 if (ret < 0) { 2165 goto fail; 2166 } 2167 } 2168 2169 /* Each L2 slice is handled by its own loop iteration */ 2170 nb_clusters = size_to_clusters(s, end_offset - offset); 2171 2172 while (nb_clusters > 0) { 2173 cleared = zero_in_l2_slice(bs, offset, nb_clusters, flags); 2174 if (cleared < 0) { 2175 ret = cleared; 2176 goto fail; 2177 } 2178 2179 nb_clusters -= cleared; 2180 offset += (cleared * s->cluster_size); 2181 } 2182 2183 if (tail) { 2184 ret = zero_l2_subclusters(bs, end_offset, size_to_subclusters(s, tail)); 2185 if (ret < 0) { 2186 goto fail; 2187 } 2188 } 2189 2190 ret = 0; 2191 fail: 2192 s->cache_discards = false; 2193 qcow2_process_discards(bs, ret); 2194 2195 return ret; 2196 } 2197 2198 /* 2199 * Expands all zero clusters in a specific L1 table (or deallocates them, for 2200 * non-backed non-pre-allocated zero clusters). 2201 * 2202 * l1_entries and *visited_l1_entries are used to keep track of progress for 2203 * status_cb(). l1_entries contains the total number of L1 entries and 2204 * *visited_l1_entries counts all visited L1 entries. 2205 */ 2206 static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, 2207 int l1_size, int64_t *visited_l1_entries, 2208 int64_t l1_entries, 2209 BlockDriverAmendStatusCB *status_cb, 2210 void *cb_opaque) 2211 { 2212 BDRVQcow2State *s = bs->opaque; 2213 bool is_active_l1 = (l1_table == s->l1_table); 2214 uint64_t *l2_slice = NULL; 2215 unsigned slice, slice_size2, n_slices; 2216 int ret; 2217 int i, j; 2218 2219 /* qcow2_downgrade() is not allowed in images with subclusters */ 2220 assert(!has_subclusters(s)); 2221 2222 slice_size2 = s->l2_slice_size * l2_entry_size(s); 2223 n_slices = s->cluster_size / slice_size2; 2224 2225 if (!is_active_l1) { 2226 /* inactive L2 tables require a buffer to be stored in when loading 2227 * them from disk */ 2228 l2_slice = qemu_try_blockalign(bs->file->bs, slice_size2); 2229 if (l2_slice == NULL) { 2230 return -ENOMEM; 2231 } 2232 } 2233 2234 for (i = 0; i < l1_size; i++) { 2235 uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; 2236 uint64_t l2_refcount; 2237 2238 if (!l2_offset) { 2239 /* unallocated */ 2240 (*visited_l1_entries)++; 2241 if (status_cb) { 2242 status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 2243 } 2244 continue; 2245 } 2246 2247 if (offset_into_cluster(s, l2_offset)) { 2248 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" 2249 PRIx64 " unaligned (L1 index: %#x)", 2250 l2_offset, i); 2251 ret = -EIO; 2252 goto fail; 2253 } 2254 2255 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, 2256 &l2_refcount); 2257 if (ret < 0) { 2258 goto fail; 2259 } 2260 2261 for (slice = 0; slice < n_slices; slice++) { 2262 uint64_t slice_offset = l2_offset + slice * slice_size2; 2263 bool l2_dirty = false; 2264 if (is_active_l1) { 2265 /* get active L2 tables from cache */ 2266 ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset, 2267 (void **)&l2_slice); 2268 } else { 2269 /* load inactive L2 tables from disk */ 2270 ret = bdrv_pread(bs->file, slice_offset, slice_size2, 2271 l2_slice, 0); 2272 } 2273 if (ret < 0) { 2274 goto fail; 2275 } 2276 2277 for (j = 0; j < s->l2_slice_size; j++) { 2278 uint64_t l2_entry = get_l2_entry(s, l2_slice, j); 2279 int64_t offset = l2_entry & L2E_OFFSET_MASK; 2280 QCow2ClusterType cluster_type = 2281 qcow2_get_cluster_type(bs, l2_entry); 2282 2283 if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN && 2284 cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) { 2285 continue; 2286 } 2287 2288 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2289 if (!bs->backing) { 2290 /* 2291 * not backed; therefore we can simply deallocate the 2292 * cluster. No need to call set_l2_bitmap(), this 2293 * function doesn't support images with subclusters. 2294 */ 2295 set_l2_entry(s, l2_slice, j, 0); 2296 l2_dirty = true; 2297 continue; 2298 } 2299 2300 offset = qcow2_alloc_clusters(bs, s->cluster_size); 2301 if (offset < 0) { 2302 ret = offset; 2303 goto fail; 2304 } 2305 2306 /* The offset must fit in the offset field */ 2307 assert((offset & L2E_OFFSET_MASK) == offset); 2308 2309 if (l2_refcount > 1) { 2310 /* For shared L2 tables, set the refcount accordingly 2311 * (it is already 1 and needs to be l2_refcount) */ 2312 ret = qcow2_update_cluster_refcount( 2313 bs, offset >> s->cluster_bits, 2314 refcount_diff(1, l2_refcount), false, 2315 QCOW2_DISCARD_OTHER); 2316 if (ret < 0) { 2317 qcow2_free_clusters(bs, offset, s->cluster_size, 2318 QCOW2_DISCARD_OTHER); 2319 goto fail; 2320 } 2321 } 2322 } 2323 2324 if (offset_into_cluster(s, offset)) { 2325 int l2_index = slice * s->l2_slice_size + j; 2326 qcow2_signal_corruption( 2327 bs, true, -1, -1, 2328 "Cluster allocation offset " 2329 "%#" PRIx64 " unaligned (L2 offset: %#" 2330 PRIx64 ", L2 index: %#x)", offset, 2331 l2_offset, l2_index); 2332 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2333 qcow2_free_clusters(bs, offset, s->cluster_size, 2334 QCOW2_DISCARD_ALWAYS); 2335 } 2336 ret = -EIO; 2337 goto fail; 2338 } 2339 2340 ret = qcow2_pre_write_overlap_check(bs, 0, offset, 2341 s->cluster_size, true); 2342 if (ret < 0) { 2343 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2344 qcow2_free_clusters(bs, offset, s->cluster_size, 2345 QCOW2_DISCARD_ALWAYS); 2346 } 2347 goto fail; 2348 } 2349 2350 ret = bdrv_pwrite_zeroes(s->data_file, offset, 2351 s->cluster_size, 0); 2352 if (ret < 0) { 2353 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) { 2354 qcow2_free_clusters(bs, offset, s->cluster_size, 2355 QCOW2_DISCARD_ALWAYS); 2356 } 2357 goto fail; 2358 } 2359 2360 if (l2_refcount == 1) { 2361 set_l2_entry(s, l2_slice, j, offset | QCOW_OFLAG_COPIED); 2362 } else { 2363 set_l2_entry(s, l2_slice, j, offset); 2364 } 2365 /* 2366 * No need to call set_l2_bitmap() after set_l2_entry() because 2367 * this function doesn't support images with subclusters. 2368 */ 2369 l2_dirty = true; 2370 } 2371 2372 if (is_active_l1) { 2373 if (l2_dirty) { 2374 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice); 2375 qcow2_cache_depends_on_flush(s->l2_table_cache); 2376 } 2377 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2378 } else { 2379 if (l2_dirty) { 2380 ret = qcow2_pre_write_overlap_check( 2381 bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, 2382 slice_offset, slice_size2, false); 2383 if (ret < 0) { 2384 goto fail; 2385 } 2386 2387 ret = bdrv_pwrite(bs->file, slice_offset, slice_size2, 2388 l2_slice, 0); 2389 if (ret < 0) { 2390 goto fail; 2391 } 2392 } 2393 } 2394 } 2395 2396 (*visited_l1_entries)++; 2397 if (status_cb) { 2398 status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); 2399 } 2400 } 2401 2402 ret = 0; 2403 2404 fail: 2405 if (l2_slice) { 2406 if (!is_active_l1) { 2407 qemu_vfree(l2_slice); 2408 } else { 2409 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 2410 } 2411 } 2412 return ret; 2413 } 2414 2415 /* 2416 * For backed images, expands all zero clusters on the image. For non-backed 2417 * images, deallocates all non-pre-allocated zero clusters (and claims the 2418 * allocation for pre-allocated ones). This is important for downgrading to a 2419 * qcow2 version which doesn't yet support metadata zero clusters. 2420 */ 2421 int qcow2_expand_zero_clusters(BlockDriverState *bs, 2422 BlockDriverAmendStatusCB *status_cb, 2423 void *cb_opaque) 2424 { 2425 BDRVQcow2State *s = bs->opaque; 2426 uint64_t *l1_table = NULL; 2427 int64_t l1_entries = 0, visited_l1_entries = 0; 2428 int ret; 2429 int i, j; 2430 2431 if (status_cb) { 2432 l1_entries = s->l1_size; 2433 for (i = 0; i < s->nb_snapshots; i++) { 2434 l1_entries += s->snapshots[i].l1_size; 2435 } 2436 } 2437 2438 ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, 2439 &visited_l1_entries, l1_entries, 2440 status_cb, cb_opaque); 2441 if (ret < 0) { 2442 goto fail; 2443 } 2444 2445 /* Inactive L1 tables may point to active L2 tables - therefore it is 2446 * necessary to flush the L2 table cache before trying to access the L2 2447 * tables pointed to by inactive L1 entries (else we might try to expand 2448 * zero clusters that have already been expanded); furthermore, it is also 2449 * necessary to empty the L2 table cache, since it may contain tables which 2450 * are now going to be modified directly on disk, bypassing the cache. 2451 * qcow2_cache_empty() does both for us. */ 2452 ret = qcow2_cache_empty(bs, s->l2_table_cache); 2453 if (ret < 0) { 2454 goto fail; 2455 } 2456 2457 for (i = 0; i < s->nb_snapshots; i++) { 2458 int l1_size2; 2459 uint64_t *new_l1_table; 2460 Error *local_err = NULL; 2461 2462 ret = qcow2_validate_table(bs, s->snapshots[i].l1_table_offset, 2463 s->snapshots[i].l1_size, L1E_SIZE, 2464 QCOW_MAX_L1_SIZE, "Snapshot L1 table", 2465 &local_err); 2466 if (ret < 0) { 2467 error_report_err(local_err); 2468 goto fail; 2469 } 2470 2471 l1_size2 = s->snapshots[i].l1_size * L1E_SIZE; 2472 new_l1_table = g_try_realloc(l1_table, l1_size2); 2473 2474 if (!new_l1_table) { 2475 ret = -ENOMEM; 2476 goto fail; 2477 } 2478 2479 l1_table = new_l1_table; 2480 2481 ret = bdrv_pread(bs->file, s->snapshots[i].l1_table_offset, l1_size2, 2482 l1_table, 0); 2483 if (ret < 0) { 2484 goto fail; 2485 } 2486 2487 for (j = 0; j < s->snapshots[i].l1_size; j++) { 2488 be64_to_cpus(&l1_table[j]); 2489 } 2490 2491 ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, 2492 &visited_l1_entries, l1_entries, 2493 status_cb, cb_opaque); 2494 if (ret < 0) { 2495 goto fail; 2496 } 2497 } 2498 2499 ret = 0; 2500 2501 fail: 2502 g_free(l1_table); 2503 return ret; 2504 } 2505 2506 void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry, 2507 uint64_t *coffset, int *csize) 2508 { 2509 BDRVQcow2State *s = bs->opaque; 2510 int nb_csectors; 2511 2512 assert(qcow2_get_cluster_type(bs, l2_entry) == QCOW2_CLUSTER_COMPRESSED); 2513 2514 *coffset = l2_entry & s->cluster_offset_mask; 2515 2516 nb_csectors = ((l2_entry >> s->csize_shift) & s->csize_mask) + 1; 2517 *csize = nb_csectors * QCOW2_COMPRESSED_SECTOR_SIZE - 2518 (*coffset & (QCOW2_COMPRESSED_SECTOR_SIZE - 1)); 2519 } 2520