1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu/osdep.h" 26 #include "qapi/error.h" 27 #include "qemu-common.h" 28 #include "block/block_int.h" 29 #include "qcow2.h" 30 #include "qemu/range.h" 31 #include "qemu/bswap.h" 32 #include "qemu/cutils.h" 33 34 static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size, 35 uint64_t max); 36 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, 37 int64_t offset, int64_t length, uint64_t addend, 38 bool decrease, enum qcow2_discard_type type); 39 40 static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index); 41 static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index); 42 static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index); 43 static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index); 44 static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index); 45 static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index); 46 static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index); 47 48 static void set_refcount_ro0(void *refcount_array, uint64_t index, 49 uint64_t value); 50 static void set_refcount_ro1(void *refcount_array, uint64_t index, 51 uint64_t value); 52 static void set_refcount_ro2(void *refcount_array, uint64_t index, 53 uint64_t value); 54 static void set_refcount_ro3(void *refcount_array, uint64_t index, 55 uint64_t value); 56 static void set_refcount_ro4(void *refcount_array, uint64_t index, 57 uint64_t value); 58 static void set_refcount_ro5(void *refcount_array, uint64_t index, 59 uint64_t value); 60 static void set_refcount_ro6(void *refcount_array, uint64_t index, 61 uint64_t value); 62 63 64 static Qcow2GetRefcountFunc *const get_refcount_funcs[] = { 65 &get_refcount_ro0, 66 &get_refcount_ro1, 67 &get_refcount_ro2, 68 &get_refcount_ro3, 69 &get_refcount_ro4, 70 &get_refcount_ro5, 71 &get_refcount_ro6 72 }; 73 74 static Qcow2SetRefcountFunc *const set_refcount_funcs[] = { 75 &set_refcount_ro0, 76 &set_refcount_ro1, 77 &set_refcount_ro2, 78 &set_refcount_ro3, 79 &set_refcount_ro4, 80 &set_refcount_ro5, 81 &set_refcount_ro6 82 }; 83 84 85 /*********************************************************/ 86 /* refcount handling */ 87 88 static void update_max_refcount_table_index(BDRVQcow2State *s) 89 { 90 unsigned i = s->refcount_table_size - 1; 91 while (i > 0 && (s->refcount_table[i] & REFT_OFFSET_MASK) == 0) { 92 i--; 93 } 94 /* Set s->max_refcount_table_index to the index of the last used entry */ 95 s->max_refcount_table_index = i; 96 } 97 98 int qcow2_refcount_init(BlockDriverState *bs) 99 { 100 BDRVQcow2State *s = bs->opaque; 101 unsigned int refcount_table_size2, i; 102 int ret; 103 104 assert(s->refcount_order >= 0 && s->refcount_order <= 6); 105 106 s->get_refcount = get_refcount_funcs[s->refcount_order]; 107 s->set_refcount = set_refcount_funcs[s->refcount_order]; 108 109 assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t)); 110 refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); 111 s->refcount_table = g_try_malloc(refcount_table_size2); 112 113 if (s->refcount_table_size > 0) { 114 if (s->refcount_table == NULL) { 115 ret = -ENOMEM; 116 goto fail; 117 } 118 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD); 119 ret = bdrv_pread(bs->file, s->refcount_table_offset, 120 s->refcount_table, refcount_table_size2); 121 if (ret < 0) { 122 goto fail; 123 } 124 for(i = 0; i < s->refcount_table_size; i++) 125 be64_to_cpus(&s->refcount_table[i]); 126 update_max_refcount_table_index(s); 127 } 128 return 0; 129 fail: 130 return ret; 131 } 132 133 void qcow2_refcount_close(BlockDriverState *bs) 134 { 135 BDRVQcow2State *s = bs->opaque; 136 g_free(s->refcount_table); 137 } 138 139 140 static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index) 141 { 142 return (((const uint8_t *)refcount_array)[index / 8] >> (index % 8)) & 0x1; 143 } 144 145 static void set_refcount_ro0(void *refcount_array, uint64_t index, 146 uint64_t value) 147 { 148 assert(!(value >> 1)); 149 ((uint8_t *)refcount_array)[index / 8] &= ~(0x1 << (index % 8)); 150 ((uint8_t *)refcount_array)[index / 8] |= value << (index % 8); 151 } 152 153 static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index) 154 { 155 return (((const uint8_t *)refcount_array)[index / 4] >> (2 * (index % 4))) 156 & 0x3; 157 } 158 159 static void set_refcount_ro1(void *refcount_array, uint64_t index, 160 uint64_t value) 161 { 162 assert(!(value >> 2)); 163 ((uint8_t *)refcount_array)[index / 4] &= ~(0x3 << (2 * (index % 4))); 164 ((uint8_t *)refcount_array)[index / 4] |= value << (2 * (index % 4)); 165 } 166 167 static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index) 168 { 169 return (((const uint8_t *)refcount_array)[index / 2] >> (4 * (index % 2))) 170 & 0xf; 171 } 172 173 static void set_refcount_ro2(void *refcount_array, uint64_t index, 174 uint64_t value) 175 { 176 assert(!(value >> 4)); 177 ((uint8_t *)refcount_array)[index / 2] &= ~(0xf << (4 * (index % 2))); 178 ((uint8_t *)refcount_array)[index / 2] |= value << (4 * (index % 2)); 179 } 180 181 static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index) 182 { 183 return ((const uint8_t *)refcount_array)[index]; 184 } 185 186 static void set_refcount_ro3(void *refcount_array, uint64_t index, 187 uint64_t value) 188 { 189 assert(!(value >> 8)); 190 ((uint8_t *)refcount_array)[index] = value; 191 } 192 193 static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index) 194 { 195 return be16_to_cpu(((const uint16_t *)refcount_array)[index]); 196 } 197 198 static void set_refcount_ro4(void *refcount_array, uint64_t index, 199 uint64_t value) 200 { 201 assert(!(value >> 16)); 202 ((uint16_t *)refcount_array)[index] = cpu_to_be16(value); 203 } 204 205 static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index) 206 { 207 return be32_to_cpu(((const uint32_t *)refcount_array)[index]); 208 } 209 210 static void set_refcount_ro5(void *refcount_array, uint64_t index, 211 uint64_t value) 212 { 213 assert(!(value >> 32)); 214 ((uint32_t *)refcount_array)[index] = cpu_to_be32(value); 215 } 216 217 static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index) 218 { 219 return be64_to_cpu(((const uint64_t *)refcount_array)[index]); 220 } 221 222 static void set_refcount_ro6(void *refcount_array, uint64_t index, 223 uint64_t value) 224 { 225 ((uint64_t *)refcount_array)[index] = cpu_to_be64(value); 226 } 227 228 229 static int load_refcount_block(BlockDriverState *bs, 230 int64_t refcount_block_offset, 231 void **refcount_block) 232 { 233 BDRVQcow2State *s = bs->opaque; 234 235 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD); 236 return qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset, 237 refcount_block); 238 } 239 240 /* 241 * Retrieves the refcount of the cluster given by its index and stores it in 242 * *refcount. Returns 0 on success and -errno on failure. 243 */ 244 int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, 245 uint64_t *refcount) 246 { 247 BDRVQcow2State *s = bs->opaque; 248 uint64_t refcount_table_index, block_index; 249 int64_t refcount_block_offset; 250 int ret; 251 void *refcount_block; 252 253 refcount_table_index = cluster_index >> s->refcount_block_bits; 254 if (refcount_table_index >= s->refcount_table_size) { 255 *refcount = 0; 256 return 0; 257 } 258 refcount_block_offset = 259 s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK; 260 if (!refcount_block_offset) { 261 *refcount = 0; 262 return 0; 263 } 264 265 if (offset_into_cluster(s, refcount_block_offset)) { 266 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64 267 " unaligned (reftable index: %#" PRIx64 ")", 268 refcount_block_offset, refcount_table_index); 269 return -EIO; 270 } 271 272 ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset, 273 &refcount_block); 274 if (ret < 0) { 275 return ret; 276 } 277 278 block_index = cluster_index & (s->refcount_block_size - 1); 279 *refcount = s->get_refcount(refcount_block, block_index); 280 281 qcow2_cache_put(s->refcount_block_cache, &refcount_block); 282 283 return 0; 284 } 285 286 /* Checks if two offsets are described by the same refcount block */ 287 static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a, 288 uint64_t offset_b) 289 { 290 uint64_t block_a = offset_a >> (s->cluster_bits + s->refcount_block_bits); 291 uint64_t block_b = offset_b >> (s->cluster_bits + s->refcount_block_bits); 292 293 return (block_a == block_b); 294 } 295 296 /* 297 * Loads a refcount block. If it doesn't exist yet, it is allocated first 298 * (including growing the refcount table if needed). 299 * 300 * Returns 0 on success or -errno in error case 301 */ 302 static int alloc_refcount_block(BlockDriverState *bs, 303 int64_t cluster_index, void **refcount_block) 304 { 305 BDRVQcow2State *s = bs->opaque; 306 unsigned int refcount_table_index; 307 int64_t ret; 308 309 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC); 310 311 /* Find the refcount block for the given cluster */ 312 refcount_table_index = cluster_index >> s->refcount_block_bits; 313 314 if (refcount_table_index < s->refcount_table_size) { 315 316 uint64_t refcount_block_offset = 317 s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK; 318 319 /* If it's already there, we're done */ 320 if (refcount_block_offset) { 321 if (offset_into_cluster(s, refcount_block_offset)) { 322 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" 323 PRIx64 " unaligned (reftable index: " 324 "%#x)", refcount_block_offset, 325 refcount_table_index); 326 return -EIO; 327 } 328 329 return load_refcount_block(bs, refcount_block_offset, 330 refcount_block); 331 } 332 } 333 334 /* 335 * If we came here, we need to allocate something. Something is at least 336 * a cluster for the new refcount block. It may also include a new refcount 337 * table if the old refcount table is too small. 338 * 339 * Note that allocating clusters here needs some special care: 340 * 341 * - We can't use the normal qcow2_alloc_clusters(), it would try to 342 * increase the refcount and very likely we would end up with an endless 343 * recursion. Instead we must place the refcount blocks in a way that 344 * they can describe them themselves. 345 * 346 * - We need to consider that at this point we are inside update_refcounts 347 * and potentially doing an initial refcount increase. This means that 348 * some clusters have already been allocated by the caller, but their 349 * refcount isn't accurate yet. If we allocate clusters for metadata, we 350 * need to return -EAGAIN to signal the caller that it needs to restart 351 * the search for free clusters. 352 * 353 * - alloc_clusters_noref and qcow2_free_clusters may load a different 354 * refcount block into the cache 355 */ 356 357 *refcount_block = NULL; 358 359 /* We write to the refcount table, so we might depend on L2 tables */ 360 ret = qcow2_cache_flush(bs, s->l2_table_cache); 361 if (ret < 0) { 362 return ret; 363 } 364 365 /* Allocate the refcount block itself and mark it as used */ 366 int64_t new_block = alloc_clusters_noref(bs, s->cluster_size, INT64_MAX); 367 if (new_block < 0) { 368 return new_block; 369 } 370 371 /* The offset must fit in the offset field of the refcount table entry */ 372 assert((new_block & REFT_OFFSET_MASK) == new_block); 373 374 /* If we're allocating the block at offset 0 then something is wrong */ 375 if (new_block == 0) { 376 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid " 377 "allocation of refcount block at offset 0"); 378 return -EIO; 379 } 380 381 #ifdef DEBUG_ALLOC2 382 fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64 383 " at %" PRIx64 "\n", 384 refcount_table_index, cluster_index << s->cluster_bits, new_block); 385 #endif 386 387 if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) { 388 /* Zero the new refcount block before updating it */ 389 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, 390 refcount_block); 391 if (ret < 0) { 392 goto fail; 393 } 394 395 memset(*refcount_block, 0, s->cluster_size); 396 397 /* The block describes itself, need to update the cache */ 398 int block_index = (new_block >> s->cluster_bits) & 399 (s->refcount_block_size - 1); 400 s->set_refcount(*refcount_block, block_index, 1); 401 } else { 402 /* Described somewhere else. This can recurse at most twice before we 403 * arrive at a block that describes itself. */ 404 ret = update_refcount(bs, new_block, s->cluster_size, 1, false, 405 QCOW2_DISCARD_NEVER); 406 if (ret < 0) { 407 goto fail; 408 } 409 410 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 411 if (ret < 0) { 412 goto fail; 413 } 414 415 /* Initialize the new refcount block only after updating its refcount, 416 * update_refcount uses the refcount cache itself */ 417 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, 418 refcount_block); 419 if (ret < 0) { 420 goto fail; 421 } 422 423 memset(*refcount_block, 0, s->cluster_size); 424 } 425 426 /* Now the new refcount block needs to be written to disk */ 427 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE); 428 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block); 429 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 430 if (ret < 0) { 431 goto fail; 432 } 433 434 /* If the refcount table is big enough, just hook the block up there */ 435 if (refcount_table_index < s->refcount_table_size) { 436 uint64_t data64 = cpu_to_be64(new_block); 437 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP); 438 ret = bdrv_pwrite_sync(bs->file, 439 s->refcount_table_offset + refcount_table_index * sizeof(uint64_t), 440 &data64, sizeof(data64)); 441 if (ret < 0) { 442 goto fail; 443 } 444 445 s->refcount_table[refcount_table_index] = new_block; 446 /* If there's a hole in s->refcount_table then it can happen 447 * that refcount_table_index < s->max_refcount_table_index */ 448 s->max_refcount_table_index = 449 MAX(s->max_refcount_table_index, refcount_table_index); 450 451 /* The new refcount block may be where the caller intended to put its 452 * data, so let it restart the search. */ 453 return -EAGAIN; 454 } 455 456 qcow2_cache_put(s->refcount_block_cache, refcount_block); 457 458 /* 459 * If we come here, we need to grow the refcount table. Again, a new 460 * refcount table needs some space and we can't simply allocate to avoid 461 * endless recursion. 462 * 463 * Therefore let's grab new refcount blocks at the end of the image, which 464 * will describe themselves and the new refcount table. This way we can 465 * reference them only in the new table and do the switch to the new 466 * refcount table at once without producing an inconsistent state in 467 * between. 468 */ 469 BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW); 470 471 /* Calculate the number of refcount blocks needed so far; this will be the 472 * basis for calculating the index of the first cluster used for the 473 * self-describing refcount structures which we are about to create. 474 * 475 * Because we reached this point, there cannot be any refcount entries for 476 * cluster_index or higher indices yet. However, because new_block has been 477 * allocated to describe that cluster (and it will assume this role later 478 * on), we cannot use that index; also, new_block may actually have a higher 479 * cluster index than cluster_index, so it needs to be taken into account 480 * here (and 1 needs to be added to its value because that cluster is used). 481 */ 482 uint64_t blocks_used = DIV_ROUND_UP(MAX(cluster_index + 1, 483 (new_block >> s->cluster_bits) + 1), 484 s->refcount_block_size); 485 486 /* Create the new refcount table and blocks */ 487 uint64_t meta_offset = (blocks_used * s->refcount_block_size) * 488 s->cluster_size; 489 490 ret = qcow2_refcount_area(bs, meta_offset, 0, false, 491 refcount_table_index, new_block); 492 if (ret < 0) { 493 return ret; 494 } 495 496 ret = load_refcount_block(bs, new_block, refcount_block); 497 if (ret < 0) { 498 return ret; 499 } 500 501 /* If we were trying to do the initial refcount update for some cluster 502 * allocation, we might have used the same clusters to store newly 503 * allocated metadata. Make the caller search some new space. */ 504 return -EAGAIN; 505 506 fail: 507 if (*refcount_block != NULL) { 508 qcow2_cache_put(s->refcount_block_cache, refcount_block); 509 } 510 return ret; 511 } 512 513 /* 514 * Starting at @start_offset, this function creates new self-covering refcount 515 * structures: A new refcount table and refcount blocks which cover all of 516 * themselves, and a number of @additional_clusters beyond their end. 517 * @start_offset must be at the end of the image file, that is, there must be 518 * only empty space beyond it. 519 * If @exact_size is false, the refcount table will have 50 % more entries than 520 * necessary so it will not need to grow again soon. 521 * If @new_refblock_offset is not zero, it contains the offset of a refcount 522 * block that should be entered into the new refcount table at index 523 * @new_refblock_index. 524 * 525 * Returns: The offset after the new refcount structures (i.e. where the 526 * @additional_clusters may be placed) on success, -errno on error. 527 */ 528 int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset, 529 uint64_t additional_clusters, bool exact_size, 530 int new_refblock_index, 531 uint64_t new_refblock_offset) 532 { 533 BDRVQcow2State *s = bs->opaque; 534 uint64_t total_refblock_count_u64, additional_refblock_count; 535 int total_refblock_count, table_size, area_reftable_index, table_clusters; 536 int i; 537 uint64_t table_offset, block_offset, end_offset; 538 int ret; 539 uint64_t *new_table; 540 541 assert(!(start_offset % s->cluster_size)); 542 543 qcow2_refcount_metadata_size(start_offset / s->cluster_size + 544 additional_clusters, 545 s->cluster_size, s->refcount_order, 546 !exact_size, &total_refblock_count_u64); 547 if (total_refblock_count_u64 > QCOW_MAX_REFTABLE_SIZE) { 548 return -EFBIG; 549 } 550 total_refblock_count = total_refblock_count_u64; 551 552 /* Index in the refcount table of the first refcount block to cover the area 553 * of refcount structures we are about to create; we know that 554 * @total_refblock_count can cover @start_offset, so this will definitely 555 * fit into an int. */ 556 area_reftable_index = (start_offset / s->cluster_size) / 557 s->refcount_block_size; 558 559 if (exact_size) { 560 table_size = total_refblock_count; 561 } else { 562 table_size = total_refblock_count + 563 DIV_ROUND_UP(total_refblock_count, 2); 564 } 565 /* The qcow2 file can only store the reftable size in number of clusters */ 566 table_size = ROUND_UP(table_size, s->cluster_size / sizeof(uint64_t)); 567 table_clusters = (table_size * sizeof(uint64_t)) / s->cluster_size; 568 569 if (table_size > QCOW_MAX_REFTABLE_SIZE) { 570 return -EFBIG; 571 } 572 573 new_table = g_try_new0(uint64_t, table_size); 574 575 assert(table_size > 0); 576 if (new_table == NULL) { 577 ret = -ENOMEM; 578 goto fail; 579 } 580 581 /* Fill the new refcount table */ 582 if (table_size > s->max_refcount_table_index) { 583 /* We're actually growing the reftable */ 584 memcpy(new_table, s->refcount_table, 585 (s->max_refcount_table_index + 1) * sizeof(uint64_t)); 586 } else { 587 /* Improbable case: We're shrinking the reftable. However, the caller 588 * has assured us that there is only empty space beyond @start_offset, 589 * so we can simply drop all of the refblocks that won't fit into the 590 * new reftable. */ 591 memcpy(new_table, s->refcount_table, table_size * sizeof(uint64_t)); 592 } 593 594 if (new_refblock_offset) { 595 assert(new_refblock_index < total_refblock_count); 596 new_table[new_refblock_index] = new_refblock_offset; 597 } 598 599 /* Count how many new refblocks we have to create */ 600 additional_refblock_count = 0; 601 for (i = area_reftable_index; i < total_refblock_count; i++) { 602 if (!new_table[i]) { 603 additional_refblock_count++; 604 } 605 } 606 607 table_offset = start_offset + additional_refblock_count * s->cluster_size; 608 end_offset = table_offset + table_clusters * s->cluster_size; 609 610 /* Fill the refcount blocks, and create new ones, if necessary */ 611 block_offset = start_offset; 612 for (i = area_reftable_index; i < total_refblock_count; i++) { 613 void *refblock_data; 614 uint64_t first_offset_covered; 615 616 /* Reuse an existing refblock if possible, create a new one otherwise */ 617 if (new_table[i]) { 618 ret = qcow2_cache_get(bs, s->refcount_block_cache, new_table[i], 619 &refblock_data); 620 if (ret < 0) { 621 goto fail; 622 } 623 } else { 624 ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, 625 block_offset, &refblock_data); 626 if (ret < 0) { 627 goto fail; 628 } 629 memset(refblock_data, 0, s->cluster_size); 630 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, 631 refblock_data); 632 633 new_table[i] = block_offset; 634 block_offset += s->cluster_size; 635 } 636 637 /* First host offset covered by this refblock */ 638 first_offset_covered = (uint64_t)i * s->refcount_block_size * 639 s->cluster_size; 640 if (first_offset_covered < end_offset) { 641 int j, end_index; 642 643 /* Set the refcount of all of the new refcount structures to 1 */ 644 645 if (first_offset_covered < start_offset) { 646 assert(i == area_reftable_index); 647 j = (start_offset - first_offset_covered) / s->cluster_size; 648 assert(j < s->refcount_block_size); 649 } else { 650 j = 0; 651 } 652 653 end_index = MIN((end_offset - first_offset_covered) / 654 s->cluster_size, 655 s->refcount_block_size); 656 657 for (; j < end_index; j++) { 658 /* The caller guaranteed us this space would be empty */ 659 assert(s->get_refcount(refblock_data, j) == 0); 660 s->set_refcount(refblock_data, j, 1); 661 } 662 663 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, 664 refblock_data); 665 } 666 667 qcow2_cache_put(s->refcount_block_cache, &refblock_data); 668 } 669 670 assert(block_offset == table_offset); 671 672 /* Write refcount blocks to disk */ 673 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS); 674 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 675 if (ret < 0) { 676 goto fail; 677 } 678 679 /* Write refcount table to disk */ 680 for (i = 0; i < total_refblock_count; i++) { 681 cpu_to_be64s(&new_table[i]); 682 } 683 684 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE); 685 ret = bdrv_pwrite_sync(bs->file, table_offset, new_table, 686 table_size * sizeof(uint64_t)); 687 if (ret < 0) { 688 goto fail; 689 } 690 691 for (i = 0; i < total_refblock_count; i++) { 692 be64_to_cpus(&new_table[i]); 693 } 694 695 /* Hook up the new refcount table in the qcow2 header */ 696 struct QEMU_PACKED { 697 uint64_t d64; 698 uint32_t d32; 699 } data; 700 data.d64 = cpu_to_be64(table_offset); 701 data.d32 = cpu_to_be32(table_clusters); 702 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE); 703 ret = bdrv_pwrite_sync(bs->file, 704 offsetof(QCowHeader, refcount_table_offset), 705 &data, sizeof(data)); 706 if (ret < 0) { 707 goto fail; 708 } 709 710 /* And switch it in memory */ 711 uint64_t old_table_offset = s->refcount_table_offset; 712 uint64_t old_table_size = s->refcount_table_size; 713 714 g_free(s->refcount_table); 715 s->refcount_table = new_table; 716 s->refcount_table_size = table_size; 717 s->refcount_table_offset = table_offset; 718 update_max_refcount_table_index(s); 719 720 /* Free old table. */ 721 qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t), 722 QCOW2_DISCARD_OTHER); 723 724 return end_offset; 725 726 fail: 727 g_free(new_table); 728 return ret; 729 } 730 731 void qcow2_process_discards(BlockDriverState *bs, int ret) 732 { 733 BDRVQcow2State *s = bs->opaque; 734 Qcow2DiscardRegion *d, *next; 735 736 QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) { 737 QTAILQ_REMOVE(&s->discards, d, next); 738 739 /* Discard is optional, ignore the return value */ 740 if (ret >= 0) { 741 bdrv_pdiscard(bs->file, d->offset, d->bytes); 742 } 743 744 g_free(d); 745 } 746 } 747 748 static void update_refcount_discard(BlockDriverState *bs, 749 uint64_t offset, uint64_t length) 750 { 751 BDRVQcow2State *s = bs->opaque; 752 Qcow2DiscardRegion *d, *p, *next; 753 754 QTAILQ_FOREACH(d, &s->discards, next) { 755 uint64_t new_start = MIN(offset, d->offset); 756 uint64_t new_end = MAX(offset + length, d->offset + d->bytes); 757 758 if (new_end - new_start <= length + d->bytes) { 759 /* There can't be any overlap, areas ending up here have no 760 * references any more and therefore shouldn't get freed another 761 * time. */ 762 assert(d->bytes + length == new_end - new_start); 763 d->offset = new_start; 764 d->bytes = new_end - new_start; 765 goto found; 766 } 767 } 768 769 d = g_malloc(sizeof(*d)); 770 *d = (Qcow2DiscardRegion) { 771 .bs = bs, 772 .offset = offset, 773 .bytes = length, 774 }; 775 QTAILQ_INSERT_TAIL(&s->discards, d, next); 776 777 found: 778 /* Merge discard requests if they are adjacent now */ 779 QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) { 780 if (p == d 781 || p->offset > d->offset + d->bytes 782 || d->offset > p->offset + p->bytes) 783 { 784 continue; 785 } 786 787 /* Still no overlap possible */ 788 assert(p->offset == d->offset + d->bytes 789 || d->offset == p->offset + p->bytes); 790 791 QTAILQ_REMOVE(&s->discards, p, next); 792 d->offset = MIN(d->offset, p->offset); 793 d->bytes += p->bytes; 794 g_free(p); 795 } 796 } 797 798 /* XXX: cache several refcount block clusters ? */ 799 /* @addend is the absolute value of the addend; if @decrease is set, @addend 800 * will be subtracted from the current refcount, otherwise it will be added */ 801 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, 802 int64_t offset, 803 int64_t length, 804 uint64_t addend, 805 bool decrease, 806 enum qcow2_discard_type type) 807 { 808 BDRVQcow2State *s = bs->opaque; 809 int64_t start, last, cluster_offset; 810 void *refcount_block = NULL; 811 int64_t old_table_index = -1; 812 int ret; 813 814 #ifdef DEBUG_ALLOC2 815 fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 816 " addend=%s%" PRIu64 "\n", offset, length, decrease ? "-" : "", 817 addend); 818 #endif 819 if (length < 0) { 820 return -EINVAL; 821 } else if (length == 0) { 822 return 0; 823 } 824 825 if (decrease) { 826 qcow2_cache_set_dependency(bs, s->refcount_block_cache, 827 s->l2_table_cache); 828 } 829 830 start = start_of_cluster(s, offset); 831 last = start_of_cluster(s, offset + length - 1); 832 for(cluster_offset = start; cluster_offset <= last; 833 cluster_offset += s->cluster_size) 834 { 835 int block_index; 836 uint64_t refcount; 837 int64_t cluster_index = cluster_offset >> s->cluster_bits; 838 int64_t table_index = cluster_index >> s->refcount_block_bits; 839 840 /* Load the refcount block and allocate it if needed */ 841 if (table_index != old_table_index) { 842 if (refcount_block) { 843 qcow2_cache_put(s->refcount_block_cache, &refcount_block); 844 } 845 ret = alloc_refcount_block(bs, cluster_index, &refcount_block); 846 /* If the caller needs to restart the search for free clusters, 847 * try the same ones first to see if they're still free. */ 848 if (ret == -EAGAIN) { 849 if (s->free_cluster_index > (start >> s->cluster_bits)) { 850 s->free_cluster_index = (start >> s->cluster_bits); 851 } 852 } 853 if (ret < 0) { 854 goto fail; 855 } 856 } 857 old_table_index = table_index; 858 859 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block); 860 861 /* we can update the count and save it */ 862 block_index = cluster_index & (s->refcount_block_size - 1); 863 864 refcount = s->get_refcount(refcount_block, block_index); 865 if (decrease ? (refcount - addend > refcount) 866 : (refcount + addend < refcount || 867 refcount + addend > s->refcount_max)) 868 { 869 ret = -EINVAL; 870 goto fail; 871 } 872 if (decrease) { 873 refcount -= addend; 874 } else { 875 refcount += addend; 876 } 877 if (refcount == 0 && cluster_index < s->free_cluster_index) { 878 s->free_cluster_index = cluster_index; 879 } 880 s->set_refcount(refcount_block, block_index, refcount); 881 882 if (refcount == 0) { 883 void *table; 884 885 table = qcow2_cache_is_table_offset(s->refcount_block_cache, 886 offset); 887 if (table != NULL) { 888 qcow2_cache_put(s->refcount_block_cache, &refcount_block); 889 qcow2_cache_discard(s->refcount_block_cache, table); 890 } 891 892 table = qcow2_cache_is_table_offset(s->l2_table_cache, offset); 893 if (table != NULL) { 894 qcow2_cache_discard(s->l2_table_cache, table); 895 } 896 897 if (s->discard_passthrough[type]) { 898 update_refcount_discard(bs, cluster_offset, s->cluster_size); 899 } 900 } 901 } 902 903 ret = 0; 904 fail: 905 if (!s->cache_discards) { 906 qcow2_process_discards(bs, ret); 907 } 908 909 /* Write last changed block to disk */ 910 if (refcount_block) { 911 qcow2_cache_put(s->refcount_block_cache, &refcount_block); 912 } 913 914 /* 915 * Try do undo any updates if an error is returned (This may succeed in 916 * some cases like ENOSPC for allocating a new refcount block) 917 */ 918 if (ret < 0) { 919 int dummy; 920 dummy = update_refcount(bs, offset, cluster_offset - offset, addend, 921 !decrease, QCOW2_DISCARD_NEVER); 922 (void)dummy; 923 } 924 925 return ret; 926 } 927 928 /* 929 * Increases or decreases the refcount of a given cluster. 930 * 931 * @addend is the absolute value of the addend; if @decrease is set, @addend 932 * will be subtracted from the current refcount, otherwise it will be added. 933 * 934 * On success 0 is returned; on failure -errno is returned. 935 */ 936 int qcow2_update_cluster_refcount(BlockDriverState *bs, 937 int64_t cluster_index, 938 uint64_t addend, bool decrease, 939 enum qcow2_discard_type type) 940 { 941 BDRVQcow2State *s = bs->opaque; 942 int ret; 943 944 ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend, 945 decrease, type); 946 if (ret < 0) { 947 return ret; 948 } 949 950 return 0; 951 } 952 953 954 955 /*********************************************************/ 956 /* cluster allocation functions */ 957 958 959 960 /* return < 0 if error */ 961 static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size, 962 uint64_t max) 963 { 964 BDRVQcow2State *s = bs->opaque; 965 uint64_t i, nb_clusters, refcount; 966 int ret; 967 968 /* We can't allocate clusters if they may still be queued for discard. */ 969 if (s->cache_discards) { 970 qcow2_process_discards(bs, 0); 971 } 972 973 nb_clusters = size_to_clusters(s, size); 974 retry: 975 for(i = 0; i < nb_clusters; i++) { 976 uint64_t next_cluster_index = s->free_cluster_index++; 977 ret = qcow2_get_refcount(bs, next_cluster_index, &refcount); 978 979 if (ret < 0) { 980 return ret; 981 } else if (refcount != 0) { 982 goto retry; 983 } 984 } 985 986 /* Make sure that all offsets in the "allocated" range are representable 987 * in the requested max */ 988 if (s->free_cluster_index > 0 && 989 s->free_cluster_index - 1 > (max >> s->cluster_bits)) 990 { 991 return -EFBIG; 992 } 993 994 #ifdef DEBUG_ALLOC2 995 fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n", 996 size, 997 (s->free_cluster_index - nb_clusters) << s->cluster_bits); 998 #endif 999 return (s->free_cluster_index - nb_clusters) << s->cluster_bits; 1000 } 1001 1002 int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size) 1003 { 1004 int64_t offset; 1005 int ret; 1006 1007 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC); 1008 do { 1009 offset = alloc_clusters_noref(bs, size, QCOW_MAX_CLUSTER_OFFSET); 1010 if (offset < 0) { 1011 return offset; 1012 } 1013 1014 ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER); 1015 } while (ret == -EAGAIN); 1016 1017 if (ret < 0) { 1018 return ret; 1019 } 1020 1021 return offset; 1022 } 1023 1024 int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, 1025 int64_t nb_clusters) 1026 { 1027 BDRVQcow2State *s = bs->opaque; 1028 uint64_t cluster_index, refcount; 1029 uint64_t i; 1030 int ret; 1031 1032 assert(nb_clusters >= 0); 1033 if (nb_clusters == 0) { 1034 return 0; 1035 } 1036 1037 do { 1038 /* Check how many clusters there are free */ 1039 cluster_index = offset >> s->cluster_bits; 1040 for(i = 0; i < nb_clusters; i++) { 1041 ret = qcow2_get_refcount(bs, cluster_index++, &refcount); 1042 if (ret < 0) { 1043 return ret; 1044 } else if (refcount != 0) { 1045 break; 1046 } 1047 } 1048 1049 /* And then allocate them */ 1050 ret = update_refcount(bs, offset, i << s->cluster_bits, 1, false, 1051 QCOW2_DISCARD_NEVER); 1052 } while (ret == -EAGAIN); 1053 1054 if (ret < 0) { 1055 return ret; 1056 } 1057 1058 return i; 1059 } 1060 1061 /* only used to allocate compressed sectors. We try to allocate 1062 contiguous sectors. size must be <= cluster_size */ 1063 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) 1064 { 1065 BDRVQcow2State *s = bs->opaque; 1066 int64_t offset; 1067 size_t free_in_cluster; 1068 int ret; 1069 1070 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES); 1071 assert(size > 0 && size <= s->cluster_size); 1072 assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset)); 1073 1074 offset = s->free_byte_offset; 1075 1076 if (offset) { 1077 uint64_t refcount; 1078 ret = qcow2_get_refcount(bs, offset >> s->cluster_bits, &refcount); 1079 if (ret < 0) { 1080 return ret; 1081 } 1082 1083 if (refcount == s->refcount_max) { 1084 offset = 0; 1085 } 1086 } 1087 1088 free_in_cluster = s->cluster_size - offset_into_cluster(s, offset); 1089 do { 1090 if (!offset || free_in_cluster < size) { 1091 int64_t new_cluster; 1092 1093 new_cluster = alloc_clusters_noref(bs, s->cluster_size, 1094 MIN(s->cluster_offset_mask, 1095 QCOW_MAX_CLUSTER_OFFSET)); 1096 if (new_cluster < 0) { 1097 return new_cluster; 1098 } 1099 1100 if (new_cluster == 0) { 1101 qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid " 1102 "allocation of compressed cluster " 1103 "at offset 0"); 1104 return -EIO; 1105 } 1106 1107 if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) { 1108 offset = new_cluster; 1109 free_in_cluster = s->cluster_size; 1110 } else { 1111 free_in_cluster += s->cluster_size; 1112 } 1113 } 1114 1115 assert(offset); 1116 ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER); 1117 if (ret < 0) { 1118 offset = 0; 1119 } 1120 } while (ret == -EAGAIN); 1121 if (ret < 0) { 1122 return ret; 1123 } 1124 1125 /* The cluster refcount was incremented; refcount blocks must be flushed 1126 * before the caller's L2 table updates. */ 1127 qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); 1128 1129 s->free_byte_offset = offset + size; 1130 if (!offset_into_cluster(s, s->free_byte_offset)) { 1131 s->free_byte_offset = 0; 1132 } 1133 1134 return offset; 1135 } 1136 1137 void qcow2_free_clusters(BlockDriverState *bs, 1138 int64_t offset, int64_t size, 1139 enum qcow2_discard_type type) 1140 { 1141 int ret; 1142 1143 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE); 1144 ret = update_refcount(bs, offset, size, 1, true, type); 1145 if (ret < 0) { 1146 fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret)); 1147 /* TODO Remember the clusters to free them later and avoid leaking */ 1148 } 1149 } 1150 1151 /* 1152 * Free a cluster using its L2 entry (handles clusters of all types, e.g. 1153 * normal cluster, compressed cluster, etc.) 1154 */ 1155 void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry, 1156 int nb_clusters, enum qcow2_discard_type type) 1157 { 1158 BDRVQcow2State *s = bs->opaque; 1159 1160 switch (qcow2_get_cluster_type(l2_entry)) { 1161 case QCOW2_CLUSTER_COMPRESSED: 1162 { 1163 int nb_csectors; 1164 nb_csectors = ((l2_entry >> s->csize_shift) & 1165 s->csize_mask) + 1; 1166 qcow2_free_clusters(bs, 1167 (l2_entry & s->cluster_offset_mask) & ~511, 1168 nb_csectors * 512, type); 1169 } 1170 break; 1171 case QCOW2_CLUSTER_NORMAL: 1172 case QCOW2_CLUSTER_ZERO_ALLOC: 1173 if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) { 1174 qcow2_signal_corruption(bs, false, -1, -1, 1175 "Cannot free unaligned cluster %#llx", 1176 l2_entry & L2E_OFFSET_MASK); 1177 } else { 1178 qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, 1179 nb_clusters << s->cluster_bits, type); 1180 } 1181 break; 1182 case QCOW2_CLUSTER_ZERO_PLAIN: 1183 case QCOW2_CLUSTER_UNALLOCATED: 1184 break; 1185 default: 1186 abort(); 1187 } 1188 } 1189 1190 int coroutine_fn qcow2_write_caches(BlockDriverState *bs) 1191 { 1192 BDRVQcow2State *s = bs->opaque; 1193 int ret; 1194 1195 ret = qcow2_cache_write(bs, s->l2_table_cache); 1196 if (ret < 0) { 1197 return ret; 1198 } 1199 1200 if (qcow2_need_accurate_refcounts(s)) { 1201 ret = qcow2_cache_write(bs, s->refcount_block_cache); 1202 if (ret < 0) { 1203 return ret; 1204 } 1205 } 1206 1207 return 0; 1208 } 1209 1210 int coroutine_fn qcow2_flush_caches(BlockDriverState *bs) 1211 { 1212 int ret = qcow2_write_caches(bs); 1213 if (ret < 0) { 1214 return ret; 1215 } 1216 1217 return bdrv_flush(bs->file->bs); 1218 } 1219 1220 /*********************************************************/ 1221 /* snapshots and image creation */ 1222 1223 1224 1225 /* update the refcounts of snapshots and the copied flag */ 1226 int qcow2_update_snapshot_refcount(BlockDriverState *bs, 1227 int64_t l1_table_offset, int l1_size, int addend) 1228 { 1229 BDRVQcow2State *s = bs->opaque; 1230 uint64_t *l1_table, *l2_slice, l2_offset, entry, l1_size2, refcount; 1231 bool l1_allocated = false; 1232 int64_t old_entry, old_l2_offset; 1233 unsigned slice, slice_size2, n_slices; 1234 int i, j, l1_modified = 0, nb_csectors; 1235 int ret; 1236 1237 assert(addend >= -1 && addend <= 1); 1238 1239 l2_slice = NULL; 1240 l1_table = NULL; 1241 l1_size2 = l1_size * sizeof(uint64_t); 1242 slice_size2 = s->l2_slice_size * sizeof(uint64_t); 1243 n_slices = s->cluster_size / slice_size2; 1244 1245 s->cache_discards = true; 1246 1247 /* WARNING: qcow2_snapshot_goto relies on this function not using the 1248 * l1_table_offset when it is the current s->l1_table_offset! Be careful 1249 * when changing this! */ 1250 if (l1_table_offset != s->l1_table_offset) { 1251 l1_table = g_try_malloc0(ROUND_UP(l1_size2, 512)); 1252 if (l1_size2 && l1_table == NULL) { 1253 ret = -ENOMEM; 1254 goto fail; 1255 } 1256 l1_allocated = true; 1257 1258 ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2); 1259 if (ret < 0) { 1260 goto fail; 1261 } 1262 1263 for (i = 0; i < l1_size; i++) { 1264 be64_to_cpus(&l1_table[i]); 1265 } 1266 } else { 1267 assert(l1_size == s->l1_size); 1268 l1_table = s->l1_table; 1269 l1_allocated = false; 1270 } 1271 1272 for (i = 0; i < l1_size; i++) { 1273 l2_offset = l1_table[i]; 1274 if (l2_offset) { 1275 old_l2_offset = l2_offset; 1276 l2_offset &= L1E_OFFSET_MASK; 1277 1278 if (offset_into_cluster(s, l2_offset)) { 1279 qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" 1280 PRIx64 " unaligned (L1 index: %#x)", 1281 l2_offset, i); 1282 ret = -EIO; 1283 goto fail; 1284 } 1285 1286 for (slice = 0; slice < n_slices; slice++) { 1287 ret = qcow2_cache_get(bs, s->l2_table_cache, 1288 l2_offset + slice * slice_size2, 1289 (void **) &l2_slice); 1290 if (ret < 0) { 1291 goto fail; 1292 } 1293 1294 for (j = 0; j < s->l2_slice_size; j++) { 1295 uint64_t cluster_index; 1296 uint64_t offset; 1297 1298 entry = be64_to_cpu(l2_slice[j]); 1299 old_entry = entry; 1300 entry &= ~QCOW_OFLAG_COPIED; 1301 offset = entry & L2E_OFFSET_MASK; 1302 1303 switch (qcow2_get_cluster_type(entry)) { 1304 case QCOW2_CLUSTER_COMPRESSED: 1305 nb_csectors = ((entry >> s->csize_shift) & 1306 s->csize_mask) + 1; 1307 if (addend != 0) { 1308 ret = update_refcount( 1309 bs, (entry & s->cluster_offset_mask) & ~511, 1310 nb_csectors * 512, abs(addend), addend < 0, 1311 QCOW2_DISCARD_SNAPSHOT); 1312 if (ret < 0) { 1313 goto fail; 1314 } 1315 } 1316 /* compressed clusters are never modified */ 1317 refcount = 2; 1318 break; 1319 1320 case QCOW2_CLUSTER_NORMAL: 1321 case QCOW2_CLUSTER_ZERO_ALLOC: 1322 if (offset_into_cluster(s, offset)) { 1323 /* Here l2_index means table (not slice) index */ 1324 int l2_index = slice * s->l2_slice_size + j; 1325 qcow2_signal_corruption( 1326 bs, true, -1, -1, "Cluster " 1327 "allocation offset %#" PRIx64 1328 " unaligned (L2 offset: %#" 1329 PRIx64 ", L2 index: %#x)", 1330 offset, l2_offset, l2_index); 1331 ret = -EIO; 1332 goto fail; 1333 } 1334 1335 cluster_index = offset >> s->cluster_bits; 1336 assert(cluster_index); 1337 if (addend != 0) { 1338 ret = qcow2_update_cluster_refcount( 1339 bs, cluster_index, abs(addend), addend < 0, 1340 QCOW2_DISCARD_SNAPSHOT); 1341 if (ret < 0) { 1342 goto fail; 1343 } 1344 } 1345 1346 ret = qcow2_get_refcount(bs, cluster_index, &refcount); 1347 if (ret < 0) { 1348 goto fail; 1349 } 1350 break; 1351 1352 case QCOW2_CLUSTER_ZERO_PLAIN: 1353 case QCOW2_CLUSTER_UNALLOCATED: 1354 refcount = 0; 1355 break; 1356 1357 default: 1358 abort(); 1359 } 1360 1361 if (refcount == 1) { 1362 entry |= QCOW_OFLAG_COPIED; 1363 } 1364 if (entry != old_entry) { 1365 if (addend > 0) { 1366 qcow2_cache_set_dependency(bs, s->l2_table_cache, 1367 s->refcount_block_cache); 1368 } 1369 l2_slice[j] = cpu_to_be64(entry); 1370 qcow2_cache_entry_mark_dirty(s->l2_table_cache, 1371 l2_slice); 1372 } 1373 } 1374 1375 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1376 } 1377 1378 if (addend != 0) { 1379 ret = qcow2_update_cluster_refcount(bs, l2_offset >> 1380 s->cluster_bits, 1381 abs(addend), addend < 0, 1382 QCOW2_DISCARD_SNAPSHOT); 1383 if (ret < 0) { 1384 goto fail; 1385 } 1386 } 1387 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, 1388 &refcount); 1389 if (ret < 0) { 1390 goto fail; 1391 } else if (refcount == 1) { 1392 l2_offset |= QCOW_OFLAG_COPIED; 1393 } 1394 if (l2_offset != old_l2_offset) { 1395 l1_table[i] = l2_offset; 1396 l1_modified = 1; 1397 } 1398 } 1399 } 1400 1401 ret = bdrv_flush(bs); 1402 fail: 1403 if (l2_slice) { 1404 qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice); 1405 } 1406 1407 s->cache_discards = false; 1408 qcow2_process_discards(bs, ret); 1409 1410 /* Update L1 only if it isn't deleted anyway (addend = -1) */ 1411 if (ret == 0 && addend >= 0 && l1_modified) { 1412 for (i = 0; i < l1_size; i++) { 1413 cpu_to_be64s(&l1_table[i]); 1414 } 1415 1416 ret = bdrv_pwrite_sync(bs->file, l1_table_offset, 1417 l1_table, l1_size2); 1418 1419 for (i = 0; i < l1_size; i++) { 1420 be64_to_cpus(&l1_table[i]); 1421 } 1422 } 1423 if (l1_allocated) 1424 g_free(l1_table); 1425 return ret; 1426 } 1427 1428 1429 1430 1431 /*********************************************************/ 1432 /* refcount checking functions */ 1433 1434 1435 static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries) 1436 { 1437 /* This assertion holds because there is no way we can address more than 1438 * 2^(64 - 9) clusters at once (with cluster size 512 = 2^9, and because 1439 * offsets have to be representable in bytes); due to every cluster 1440 * corresponding to one refcount entry, we are well below that limit */ 1441 assert(entries < (UINT64_C(1) << (64 - 9))); 1442 1443 /* Thanks to the assertion this will not overflow, because 1444 * s->refcount_order < 7. 1445 * (note: x << s->refcount_order == x * s->refcount_bits) */ 1446 return DIV_ROUND_UP(entries << s->refcount_order, 8); 1447 } 1448 1449 /** 1450 * Reallocates *array so that it can hold new_size entries. *size must contain 1451 * the current number of entries in *array. If the reallocation fails, *array 1452 * and *size will not be modified and -errno will be returned. If the 1453 * reallocation is successful, *array will be set to the new buffer, *size 1454 * will be set to new_size and 0 will be returned. The size of the reallocated 1455 * refcount array buffer will be aligned to a cluster boundary, and the newly 1456 * allocated area will be zeroed. 1457 */ 1458 static int realloc_refcount_array(BDRVQcow2State *s, void **array, 1459 int64_t *size, int64_t new_size) 1460 { 1461 int64_t old_byte_size, new_byte_size; 1462 void *new_ptr; 1463 1464 /* Round to clusters so the array can be directly written to disk */ 1465 old_byte_size = size_to_clusters(s, refcount_array_byte_size(s, *size)) 1466 * s->cluster_size; 1467 new_byte_size = size_to_clusters(s, refcount_array_byte_size(s, new_size)) 1468 * s->cluster_size; 1469 1470 if (new_byte_size == old_byte_size) { 1471 *size = new_size; 1472 return 0; 1473 } 1474 1475 assert(new_byte_size > 0); 1476 1477 if (new_byte_size > SIZE_MAX) { 1478 return -ENOMEM; 1479 } 1480 1481 new_ptr = g_try_realloc(*array, new_byte_size); 1482 if (!new_ptr) { 1483 return -ENOMEM; 1484 } 1485 1486 if (new_byte_size > old_byte_size) { 1487 memset((char *)new_ptr + old_byte_size, 0, 1488 new_byte_size - old_byte_size); 1489 } 1490 1491 *array = new_ptr; 1492 *size = new_size; 1493 1494 return 0; 1495 } 1496 1497 /* 1498 * Increases the refcount for a range of clusters in a given refcount table. 1499 * This is used to construct a temporary refcount table out of L1 and L2 tables 1500 * which can be compared to the refcount table saved in the image. 1501 * 1502 * Modifies the number of errors in res. 1503 */ 1504 int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res, 1505 void **refcount_table, 1506 int64_t *refcount_table_size, 1507 int64_t offset, int64_t size) 1508 { 1509 BDRVQcow2State *s = bs->opaque; 1510 uint64_t start, last, cluster_offset, k, refcount; 1511 int ret; 1512 1513 if (size <= 0) { 1514 return 0; 1515 } 1516 1517 start = start_of_cluster(s, offset); 1518 last = start_of_cluster(s, offset + size - 1); 1519 for(cluster_offset = start; cluster_offset <= last; 1520 cluster_offset += s->cluster_size) { 1521 k = cluster_offset >> s->cluster_bits; 1522 if (k >= *refcount_table_size) { 1523 ret = realloc_refcount_array(s, refcount_table, 1524 refcount_table_size, k + 1); 1525 if (ret < 0) { 1526 res->check_errors++; 1527 return ret; 1528 } 1529 } 1530 1531 refcount = s->get_refcount(*refcount_table, k); 1532 if (refcount == s->refcount_max) { 1533 fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64 1534 "\n", cluster_offset); 1535 fprintf(stderr, "Use qemu-img amend to increase the refcount entry " 1536 "width or qemu-img convert to create a clean copy if the " 1537 "image cannot be opened for writing\n"); 1538 res->corruptions++; 1539 continue; 1540 } 1541 s->set_refcount(*refcount_table, k, refcount + 1); 1542 } 1543 1544 return 0; 1545 } 1546 1547 /* Flags for check_refcounts_l1() and check_refcounts_l2() */ 1548 enum { 1549 CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */ 1550 }; 1551 1552 /* 1553 * Increases the refcount in the given refcount table for the all clusters 1554 * referenced in the L2 table. While doing so, performs some checks on L2 1555 * entries. 1556 * 1557 * Returns the number of errors found by the checks or -errno if an internal 1558 * error occurred. 1559 */ 1560 static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, 1561 void **refcount_table, 1562 int64_t *refcount_table_size, int64_t l2_offset, 1563 int flags, BdrvCheckMode fix) 1564 { 1565 BDRVQcow2State *s = bs->opaque; 1566 uint64_t *l2_table, l2_entry; 1567 uint64_t next_contiguous_offset = 0; 1568 int i, l2_size, nb_csectors, ret; 1569 1570 /* Read L2 table from disk */ 1571 l2_size = s->l2_size * sizeof(uint64_t); 1572 l2_table = g_malloc(l2_size); 1573 1574 ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size); 1575 if (ret < 0) { 1576 fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n"); 1577 res->check_errors++; 1578 goto fail; 1579 } 1580 1581 /* Do the actual checks */ 1582 for(i = 0; i < s->l2_size; i++) { 1583 l2_entry = be64_to_cpu(l2_table[i]); 1584 1585 switch (qcow2_get_cluster_type(l2_entry)) { 1586 case QCOW2_CLUSTER_COMPRESSED: 1587 /* Compressed clusters don't have QCOW_OFLAG_COPIED */ 1588 if (l2_entry & QCOW_OFLAG_COPIED) { 1589 fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": " 1590 "copied flag must never be set for compressed " 1591 "clusters\n", l2_entry & s->cluster_offset_mask); 1592 l2_entry &= ~QCOW_OFLAG_COPIED; 1593 res->corruptions++; 1594 } 1595 1596 /* Mark cluster as used */ 1597 nb_csectors = ((l2_entry >> s->csize_shift) & 1598 s->csize_mask) + 1; 1599 l2_entry &= s->cluster_offset_mask; 1600 ret = qcow2_inc_refcounts_imrt(bs, res, 1601 refcount_table, refcount_table_size, 1602 l2_entry & ~511, nb_csectors * 512); 1603 if (ret < 0) { 1604 goto fail; 1605 } 1606 1607 if (flags & CHECK_FRAG_INFO) { 1608 res->bfi.allocated_clusters++; 1609 res->bfi.compressed_clusters++; 1610 1611 /* Compressed clusters are fragmented by nature. Since they 1612 * take up sub-sector space but we only have sector granularity 1613 * I/O we need to re-read the same sectors even for adjacent 1614 * compressed clusters. 1615 */ 1616 res->bfi.fragmented_clusters++; 1617 } 1618 break; 1619 1620 case QCOW2_CLUSTER_ZERO_ALLOC: 1621 case QCOW2_CLUSTER_NORMAL: 1622 { 1623 uint64_t offset = l2_entry & L2E_OFFSET_MASK; 1624 1625 if (flags & CHECK_FRAG_INFO) { 1626 res->bfi.allocated_clusters++; 1627 if (next_contiguous_offset && 1628 offset != next_contiguous_offset) { 1629 res->bfi.fragmented_clusters++; 1630 } 1631 next_contiguous_offset = offset + s->cluster_size; 1632 } 1633 1634 /* Correct offsets are cluster aligned */ 1635 if (offset_into_cluster(s, offset)) { 1636 if (qcow2_get_cluster_type(l2_entry) == 1637 QCOW2_CLUSTER_ZERO_ALLOC) 1638 { 1639 fprintf(stderr, "%s offset=%" PRIx64 ": Preallocated zero " 1640 "cluster is not properly aligned; L2 entry " 1641 "corrupted.\n", 1642 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", 1643 offset); 1644 if (fix & BDRV_FIX_ERRORS) { 1645 uint64_t l2e_offset = 1646 l2_offset + (uint64_t)i * sizeof(uint64_t); 1647 1648 l2_entry = QCOW_OFLAG_ZERO; 1649 l2_table[i] = cpu_to_be64(l2_entry); 1650 ret = qcow2_pre_write_overlap_check(bs, 1651 QCOW2_OL_ACTIVE_L2 | QCOW2_OL_INACTIVE_L2, 1652 l2e_offset, sizeof(uint64_t)); 1653 if (ret < 0) { 1654 fprintf(stderr, "ERROR: Overlap check failed\n"); 1655 res->check_errors++; 1656 /* Something is seriously wrong, so abort checking 1657 * this L2 table */ 1658 goto fail; 1659 } 1660 1661 ret = bdrv_pwrite_sync(bs->file, l2e_offset, 1662 &l2_table[i], sizeof(uint64_t)); 1663 if (ret < 0) { 1664 fprintf(stderr, "ERROR: Failed to overwrite L2 " 1665 "table entry: %s\n", strerror(-ret)); 1666 res->check_errors++; 1667 /* Do not abort, continue checking the rest of this 1668 * L2 table's entries */ 1669 } else { 1670 res->corruptions_fixed++; 1671 /* Skip marking the cluster as used 1672 * (it is unused now) */ 1673 continue; 1674 } 1675 } else { 1676 res->corruptions++; 1677 } 1678 } else { 1679 fprintf(stderr, "ERROR offset=%" PRIx64 ": Data cluster is " 1680 "not properly aligned; L2 entry corrupted.\n", offset); 1681 res->corruptions++; 1682 } 1683 } 1684 1685 /* Mark cluster as used */ 1686 ret = qcow2_inc_refcounts_imrt(bs, res, 1687 refcount_table, refcount_table_size, 1688 offset, s->cluster_size); 1689 if (ret < 0) { 1690 goto fail; 1691 } 1692 break; 1693 } 1694 1695 case QCOW2_CLUSTER_ZERO_PLAIN: 1696 case QCOW2_CLUSTER_UNALLOCATED: 1697 break; 1698 1699 default: 1700 abort(); 1701 } 1702 } 1703 1704 g_free(l2_table); 1705 return 0; 1706 1707 fail: 1708 g_free(l2_table); 1709 return ret; 1710 } 1711 1712 /* 1713 * Increases the refcount for the L1 table, its L2 tables and all referenced 1714 * clusters in the given refcount table. While doing so, performs some checks 1715 * on L1 and L2 entries. 1716 * 1717 * Returns the number of errors found by the checks or -errno if an internal 1718 * error occurred. 1719 */ 1720 static int check_refcounts_l1(BlockDriverState *bs, 1721 BdrvCheckResult *res, 1722 void **refcount_table, 1723 int64_t *refcount_table_size, 1724 int64_t l1_table_offset, int l1_size, 1725 int flags, BdrvCheckMode fix) 1726 { 1727 BDRVQcow2State *s = bs->opaque; 1728 uint64_t *l1_table = NULL, l2_offset, l1_size2; 1729 int i, ret; 1730 1731 l1_size2 = l1_size * sizeof(uint64_t); 1732 1733 /* Mark L1 table as used */ 1734 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size, 1735 l1_table_offset, l1_size2); 1736 if (ret < 0) { 1737 goto fail; 1738 } 1739 1740 /* Read L1 table entries from disk */ 1741 if (l1_size2 > 0) { 1742 l1_table = g_try_malloc(l1_size2); 1743 if (l1_table == NULL) { 1744 ret = -ENOMEM; 1745 res->check_errors++; 1746 goto fail; 1747 } 1748 ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2); 1749 if (ret < 0) { 1750 fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n"); 1751 res->check_errors++; 1752 goto fail; 1753 } 1754 for(i = 0;i < l1_size; i++) 1755 be64_to_cpus(&l1_table[i]); 1756 } 1757 1758 /* Do the actual checks */ 1759 for(i = 0; i < l1_size; i++) { 1760 l2_offset = l1_table[i]; 1761 if (l2_offset) { 1762 /* Mark L2 table as used */ 1763 l2_offset &= L1E_OFFSET_MASK; 1764 ret = qcow2_inc_refcounts_imrt(bs, res, 1765 refcount_table, refcount_table_size, 1766 l2_offset, s->cluster_size); 1767 if (ret < 0) { 1768 goto fail; 1769 } 1770 1771 /* L2 tables are cluster aligned */ 1772 if (offset_into_cluster(s, l2_offset)) { 1773 fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not " 1774 "cluster aligned; L1 entry corrupted\n", l2_offset); 1775 res->corruptions++; 1776 } 1777 1778 /* Process and check L2 entries */ 1779 ret = check_refcounts_l2(bs, res, refcount_table, 1780 refcount_table_size, l2_offset, flags, 1781 fix); 1782 if (ret < 0) { 1783 goto fail; 1784 } 1785 } 1786 } 1787 g_free(l1_table); 1788 return 0; 1789 1790 fail: 1791 g_free(l1_table); 1792 return ret; 1793 } 1794 1795 /* 1796 * Checks the OFLAG_COPIED flag for all L1 and L2 entries. 1797 * 1798 * This function does not print an error message nor does it increment 1799 * check_errors if qcow2_get_refcount fails (this is because such an error will 1800 * have been already detected and sufficiently signaled by the calling function 1801 * (qcow2_check_refcounts) by the time this function is called). 1802 */ 1803 static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, 1804 BdrvCheckMode fix) 1805 { 1806 BDRVQcow2State *s = bs->opaque; 1807 uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size); 1808 int ret; 1809 uint64_t refcount; 1810 int i, j; 1811 bool repair; 1812 1813 if (fix & BDRV_FIX_ERRORS) { 1814 /* Always repair */ 1815 repair = true; 1816 } else if (fix & BDRV_FIX_LEAKS) { 1817 /* Repair only if that seems safe: This function is always 1818 * called after the refcounts have been fixed, so the refcount 1819 * is accurate if that repair was successful */ 1820 repair = !res->check_errors && !res->corruptions && !res->leaks; 1821 } else { 1822 repair = false; 1823 } 1824 1825 for (i = 0; i < s->l1_size; i++) { 1826 uint64_t l1_entry = s->l1_table[i]; 1827 uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK; 1828 bool l2_dirty = false; 1829 1830 if (!l2_offset) { 1831 continue; 1832 } 1833 1834 ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, 1835 &refcount); 1836 if (ret < 0) { 1837 /* don't print message nor increment check_errors */ 1838 continue; 1839 } 1840 if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) { 1841 fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d " 1842 "l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n", 1843 repair ? "Repairing" : "ERROR", i, l1_entry, refcount); 1844 if (repair) { 1845 s->l1_table[i] = refcount == 1 1846 ? l1_entry | QCOW_OFLAG_COPIED 1847 : l1_entry & ~QCOW_OFLAG_COPIED; 1848 ret = qcow2_write_l1_entry(bs, i); 1849 if (ret < 0) { 1850 res->check_errors++; 1851 goto fail; 1852 } 1853 res->corruptions_fixed++; 1854 } else { 1855 res->corruptions++; 1856 } 1857 } 1858 1859 ret = bdrv_pread(bs->file, l2_offset, l2_table, 1860 s->l2_size * sizeof(uint64_t)); 1861 if (ret < 0) { 1862 fprintf(stderr, "ERROR: Could not read L2 table: %s\n", 1863 strerror(-ret)); 1864 res->check_errors++; 1865 goto fail; 1866 } 1867 1868 for (j = 0; j < s->l2_size; j++) { 1869 uint64_t l2_entry = be64_to_cpu(l2_table[j]); 1870 uint64_t data_offset = l2_entry & L2E_OFFSET_MASK; 1871 QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry); 1872 1873 if (cluster_type == QCOW2_CLUSTER_NORMAL || 1874 cluster_type == QCOW2_CLUSTER_ZERO_ALLOC) { 1875 ret = qcow2_get_refcount(bs, 1876 data_offset >> s->cluster_bits, 1877 &refcount); 1878 if (ret < 0) { 1879 /* don't print message nor increment check_errors */ 1880 continue; 1881 } 1882 if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) { 1883 fprintf(stderr, "%s OFLAG_COPIED data cluster: " 1884 "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n", 1885 repair ? "Repairing" : "ERROR", l2_entry, refcount); 1886 if (repair) { 1887 l2_table[j] = cpu_to_be64(refcount == 1 1888 ? l2_entry | QCOW_OFLAG_COPIED 1889 : l2_entry & ~QCOW_OFLAG_COPIED); 1890 l2_dirty = true; 1891 res->corruptions_fixed++; 1892 } else { 1893 res->corruptions++; 1894 } 1895 } 1896 } 1897 } 1898 1899 if (l2_dirty) { 1900 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2, 1901 l2_offset, s->cluster_size); 1902 if (ret < 0) { 1903 fprintf(stderr, "ERROR: Could not write L2 table; metadata " 1904 "overlap check failed: %s\n", strerror(-ret)); 1905 res->check_errors++; 1906 goto fail; 1907 } 1908 1909 ret = bdrv_pwrite(bs->file, l2_offset, l2_table, 1910 s->cluster_size); 1911 if (ret < 0) { 1912 fprintf(stderr, "ERROR: Could not write L2 table: %s\n", 1913 strerror(-ret)); 1914 res->check_errors++; 1915 goto fail; 1916 } 1917 } 1918 } 1919 1920 ret = 0; 1921 1922 fail: 1923 qemu_vfree(l2_table); 1924 return ret; 1925 } 1926 1927 /* 1928 * Checks consistency of refblocks and accounts for each refblock in 1929 * *refcount_table. 1930 */ 1931 static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, 1932 BdrvCheckMode fix, bool *rebuild, 1933 void **refcount_table, int64_t *nb_clusters) 1934 { 1935 BDRVQcow2State *s = bs->opaque; 1936 int64_t i, size; 1937 int ret; 1938 1939 for(i = 0; i < s->refcount_table_size; i++) { 1940 uint64_t offset, cluster; 1941 offset = s->refcount_table[i]; 1942 cluster = offset >> s->cluster_bits; 1943 1944 /* Refcount blocks are cluster aligned */ 1945 if (offset_into_cluster(s, offset)) { 1946 fprintf(stderr, "ERROR refcount block %" PRId64 " is not " 1947 "cluster aligned; refcount table entry corrupted\n", i); 1948 res->corruptions++; 1949 *rebuild = true; 1950 continue; 1951 } 1952 1953 if (cluster >= *nb_clusters) { 1954 fprintf(stderr, "%s refcount block %" PRId64 " is outside image\n", 1955 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); 1956 1957 if (fix & BDRV_FIX_ERRORS) { 1958 int64_t new_nb_clusters; 1959 Error *local_err = NULL; 1960 1961 if (offset > INT64_MAX - s->cluster_size) { 1962 ret = -EINVAL; 1963 goto resize_fail; 1964 } 1965 1966 ret = bdrv_truncate(bs->file, offset + s->cluster_size, 1967 PREALLOC_MODE_OFF, &local_err); 1968 if (ret < 0) { 1969 error_report_err(local_err); 1970 goto resize_fail; 1971 } 1972 size = bdrv_getlength(bs->file->bs); 1973 if (size < 0) { 1974 ret = size; 1975 goto resize_fail; 1976 } 1977 1978 new_nb_clusters = size_to_clusters(s, size); 1979 assert(new_nb_clusters >= *nb_clusters); 1980 1981 ret = realloc_refcount_array(s, refcount_table, 1982 nb_clusters, new_nb_clusters); 1983 if (ret < 0) { 1984 res->check_errors++; 1985 return ret; 1986 } 1987 1988 if (cluster >= *nb_clusters) { 1989 ret = -EINVAL; 1990 goto resize_fail; 1991 } 1992 1993 res->corruptions_fixed++; 1994 ret = qcow2_inc_refcounts_imrt(bs, res, 1995 refcount_table, nb_clusters, 1996 offset, s->cluster_size); 1997 if (ret < 0) { 1998 return ret; 1999 } 2000 /* No need to check whether the refcount is now greater than 1: 2001 * This area was just allocated and zeroed, so it can only be 2002 * exactly 1 after qcow2_inc_refcounts_imrt() */ 2003 continue; 2004 2005 resize_fail: 2006 res->corruptions++; 2007 *rebuild = true; 2008 fprintf(stderr, "ERROR could not resize image: %s\n", 2009 strerror(-ret)); 2010 } else { 2011 res->corruptions++; 2012 } 2013 continue; 2014 } 2015 2016 if (offset != 0) { 2017 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, 2018 offset, s->cluster_size); 2019 if (ret < 0) { 2020 return ret; 2021 } 2022 if (s->get_refcount(*refcount_table, cluster) != 1) { 2023 fprintf(stderr, "ERROR refcount block %" PRId64 2024 " refcount=%" PRIu64 "\n", i, 2025 s->get_refcount(*refcount_table, cluster)); 2026 res->corruptions++; 2027 *rebuild = true; 2028 } 2029 } 2030 } 2031 2032 return 0; 2033 } 2034 2035 /* 2036 * Calculates an in-memory refcount table. 2037 */ 2038 static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res, 2039 BdrvCheckMode fix, bool *rebuild, 2040 void **refcount_table, int64_t *nb_clusters) 2041 { 2042 BDRVQcow2State *s = bs->opaque; 2043 int64_t i; 2044 QCowSnapshot *sn; 2045 int ret; 2046 2047 if (!*refcount_table) { 2048 int64_t old_size = 0; 2049 ret = realloc_refcount_array(s, refcount_table, 2050 &old_size, *nb_clusters); 2051 if (ret < 0) { 2052 res->check_errors++; 2053 return ret; 2054 } 2055 } 2056 2057 /* header */ 2058 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, 2059 0, s->cluster_size); 2060 if (ret < 0) { 2061 return ret; 2062 } 2063 2064 /* current L1 table */ 2065 ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters, 2066 s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO, 2067 fix); 2068 if (ret < 0) { 2069 return ret; 2070 } 2071 2072 /* snapshots */ 2073 for (i = 0; i < s->nb_snapshots; i++) { 2074 sn = s->snapshots + i; 2075 if (offset_into_cluster(s, sn->l1_table_offset)) { 2076 fprintf(stderr, "ERROR snapshot %s (%s) l1_offset=%#" PRIx64 ": " 2077 "L1 table is not cluster aligned; snapshot table entry " 2078 "corrupted\n", sn->id_str, sn->name, sn->l1_table_offset); 2079 res->corruptions++; 2080 continue; 2081 } 2082 if (sn->l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) { 2083 fprintf(stderr, "ERROR snapshot %s (%s) l1_size=%#" PRIx32 ": " 2084 "L1 table is too large; snapshot table entry corrupted\n", 2085 sn->id_str, sn->name, sn->l1_size); 2086 res->corruptions++; 2087 continue; 2088 } 2089 ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters, 2090 sn->l1_table_offset, sn->l1_size, 0, fix); 2091 if (ret < 0) { 2092 return ret; 2093 } 2094 } 2095 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, 2096 s->snapshots_offset, s->snapshots_size); 2097 if (ret < 0) { 2098 return ret; 2099 } 2100 2101 /* refcount data */ 2102 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, 2103 s->refcount_table_offset, 2104 s->refcount_table_size * sizeof(uint64_t)); 2105 if (ret < 0) { 2106 return ret; 2107 } 2108 2109 /* encryption */ 2110 if (s->crypto_header.length) { 2111 ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters, 2112 s->crypto_header.offset, 2113 s->crypto_header.length); 2114 if (ret < 0) { 2115 return ret; 2116 } 2117 } 2118 2119 /* bitmaps */ 2120 ret = qcow2_check_bitmaps_refcounts(bs, res, refcount_table, nb_clusters); 2121 if (ret < 0) { 2122 return ret; 2123 } 2124 2125 return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters); 2126 } 2127 2128 /* 2129 * Compares the actual reference count for each cluster in the image against the 2130 * refcount as reported by the refcount structures on-disk. 2131 */ 2132 static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, 2133 BdrvCheckMode fix, bool *rebuild, 2134 int64_t *highest_cluster, 2135 void *refcount_table, int64_t nb_clusters) 2136 { 2137 BDRVQcow2State *s = bs->opaque; 2138 int64_t i; 2139 uint64_t refcount1, refcount2; 2140 int ret; 2141 2142 for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) { 2143 ret = qcow2_get_refcount(bs, i, &refcount1); 2144 if (ret < 0) { 2145 fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", 2146 i, strerror(-ret)); 2147 res->check_errors++; 2148 continue; 2149 } 2150 2151 refcount2 = s->get_refcount(refcount_table, i); 2152 2153 if (refcount1 > 0 || refcount2 > 0) { 2154 *highest_cluster = i; 2155 } 2156 2157 if (refcount1 != refcount2) { 2158 /* Check if we're allowed to fix the mismatch */ 2159 int *num_fixed = NULL; 2160 if (refcount1 == 0) { 2161 *rebuild = true; 2162 } else if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) { 2163 num_fixed = &res->leaks_fixed; 2164 } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) { 2165 num_fixed = &res->corruptions_fixed; 2166 } 2167 2168 fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64 2169 " reference=%" PRIu64 "\n", 2170 num_fixed != NULL ? "Repairing" : 2171 refcount1 < refcount2 ? "ERROR" : 2172 "Leaked", 2173 i, refcount1, refcount2); 2174 2175 if (num_fixed) { 2176 ret = update_refcount(bs, i << s->cluster_bits, 1, 2177 refcount_diff(refcount1, refcount2), 2178 refcount1 > refcount2, 2179 QCOW2_DISCARD_ALWAYS); 2180 if (ret >= 0) { 2181 (*num_fixed)++; 2182 continue; 2183 } 2184 } 2185 2186 /* And if we couldn't, print an error */ 2187 if (refcount1 < refcount2) { 2188 res->corruptions++; 2189 } else { 2190 res->leaks++; 2191 } 2192 } 2193 } 2194 } 2195 2196 /* 2197 * Allocates clusters using an in-memory refcount table (IMRT) in contrast to 2198 * the on-disk refcount structures. 2199 * 2200 * On input, *first_free_cluster tells where to start looking, and need not 2201 * actually be a free cluster; the returned offset will not be before that 2202 * cluster. On output, *first_free_cluster points to the first gap found, even 2203 * if that gap was too small to be used as the returned offset. 2204 * 2205 * Note that *first_free_cluster is a cluster index whereas the return value is 2206 * an offset. 2207 */ 2208 static int64_t alloc_clusters_imrt(BlockDriverState *bs, 2209 int cluster_count, 2210 void **refcount_table, 2211 int64_t *imrt_nb_clusters, 2212 int64_t *first_free_cluster) 2213 { 2214 BDRVQcow2State *s = bs->opaque; 2215 int64_t cluster = *first_free_cluster, i; 2216 bool first_gap = true; 2217 int contiguous_free_clusters; 2218 int ret; 2219 2220 /* Starting at *first_free_cluster, find a range of at least cluster_count 2221 * continuously free clusters */ 2222 for (contiguous_free_clusters = 0; 2223 cluster < *imrt_nb_clusters && 2224 contiguous_free_clusters < cluster_count; 2225 cluster++) 2226 { 2227 if (!s->get_refcount(*refcount_table, cluster)) { 2228 contiguous_free_clusters++; 2229 if (first_gap) { 2230 /* If this is the first free cluster found, update 2231 * *first_free_cluster accordingly */ 2232 *first_free_cluster = cluster; 2233 first_gap = false; 2234 } 2235 } else if (contiguous_free_clusters) { 2236 contiguous_free_clusters = 0; 2237 } 2238 } 2239 2240 /* If contiguous_free_clusters is greater than zero, it contains the number 2241 * of continuously free clusters until the current cluster; the first free 2242 * cluster in the current "gap" is therefore 2243 * cluster - contiguous_free_clusters */ 2244 2245 /* If no such range could be found, grow the in-memory refcount table 2246 * accordingly to append free clusters at the end of the image */ 2247 if (contiguous_free_clusters < cluster_count) { 2248 /* contiguous_free_clusters clusters are already empty at the image end; 2249 * we need cluster_count clusters; therefore, we have to allocate 2250 * cluster_count - contiguous_free_clusters new clusters at the end of 2251 * the image (which is the current value of cluster; note that cluster 2252 * may exceed old_imrt_nb_clusters if *first_free_cluster pointed beyond 2253 * the image end) */ 2254 ret = realloc_refcount_array(s, refcount_table, imrt_nb_clusters, 2255 cluster + cluster_count 2256 - contiguous_free_clusters); 2257 if (ret < 0) { 2258 return ret; 2259 } 2260 } 2261 2262 /* Go back to the first free cluster */ 2263 cluster -= contiguous_free_clusters; 2264 for (i = 0; i < cluster_count; i++) { 2265 s->set_refcount(*refcount_table, cluster + i, 1); 2266 } 2267 2268 return cluster << s->cluster_bits; 2269 } 2270 2271 /* 2272 * Creates a new refcount structure based solely on the in-memory information 2273 * given through *refcount_table. All necessary allocations will be reflected 2274 * in that array. 2275 * 2276 * On success, the old refcount structure is leaked (it will be covered by the 2277 * new refcount structure). 2278 */ 2279 static int rebuild_refcount_structure(BlockDriverState *bs, 2280 BdrvCheckResult *res, 2281 void **refcount_table, 2282 int64_t *nb_clusters) 2283 { 2284 BDRVQcow2State *s = bs->opaque; 2285 int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; 2286 int64_t refblock_offset, refblock_start, refblock_index; 2287 uint32_t reftable_size = 0; 2288 uint64_t *on_disk_reftable = NULL; 2289 void *on_disk_refblock; 2290 int ret = 0; 2291 struct { 2292 uint64_t reftable_offset; 2293 uint32_t reftable_clusters; 2294 } QEMU_PACKED reftable_offset_and_clusters; 2295 2296 qcow2_cache_empty(bs, s->refcount_block_cache); 2297 2298 write_refblocks: 2299 for (; cluster < *nb_clusters; cluster++) { 2300 if (!s->get_refcount(*refcount_table, cluster)) { 2301 continue; 2302 } 2303 2304 refblock_index = cluster >> s->refcount_block_bits; 2305 refblock_start = refblock_index << s->refcount_block_bits; 2306 2307 /* Don't allocate a cluster in a refblock already written to disk */ 2308 if (first_free_cluster < refblock_start) { 2309 first_free_cluster = refblock_start; 2310 } 2311 refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, 2312 nb_clusters, &first_free_cluster); 2313 if (refblock_offset < 0) { 2314 fprintf(stderr, "ERROR allocating refblock: %s\n", 2315 strerror(-refblock_offset)); 2316 res->check_errors++; 2317 ret = refblock_offset; 2318 goto fail; 2319 } 2320 2321 if (reftable_size <= refblock_index) { 2322 uint32_t old_reftable_size = reftable_size; 2323 uint64_t *new_on_disk_reftable; 2324 2325 reftable_size = ROUND_UP((refblock_index + 1) * sizeof(uint64_t), 2326 s->cluster_size) / sizeof(uint64_t); 2327 new_on_disk_reftable = g_try_realloc(on_disk_reftable, 2328 reftable_size * 2329 sizeof(uint64_t)); 2330 if (!new_on_disk_reftable) { 2331 res->check_errors++; 2332 ret = -ENOMEM; 2333 goto fail; 2334 } 2335 on_disk_reftable = new_on_disk_reftable; 2336 2337 memset(on_disk_reftable + old_reftable_size, 0, 2338 (reftable_size - old_reftable_size) * sizeof(uint64_t)); 2339 2340 /* The offset we have for the reftable is now no longer valid; 2341 * this will leak that range, but we can easily fix that by running 2342 * a leak-fixing check after this rebuild operation */ 2343 reftable_offset = -1; 2344 } else { 2345 assert(on_disk_reftable); 2346 } 2347 on_disk_reftable[refblock_index] = refblock_offset; 2348 2349 /* If this is apparently the last refblock (for now), try to squeeze the 2350 * reftable in */ 2351 if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits && 2352 reftable_offset < 0) 2353 { 2354 uint64_t reftable_clusters = size_to_clusters(s, reftable_size * 2355 sizeof(uint64_t)); 2356 reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, 2357 refcount_table, nb_clusters, 2358 &first_free_cluster); 2359 if (reftable_offset < 0) { 2360 fprintf(stderr, "ERROR allocating reftable: %s\n", 2361 strerror(-reftable_offset)); 2362 res->check_errors++; 2363 ret = reftable_offset; 2364 goto fail; 2365 } 2366 } 2367 2368 ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, 2369 s->cluster_size); 2370 if (ret < 0) { 2371 fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); 2372 goto fail; 2373 } 2374 2375 /* The size of *refcount_table is always cluster-aligned, therefore the 2376 * write operation will not overflow */ 2377 on_disk_refblock = (void *)((char *) *refcount_table + 2378 refblock_index * s->cluster_size); 2379 2380 ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE, 2381 on_disk_refblock, s->cluster_sectors); 2382 if (ret < 0) { 2383 fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); 2384 goto fail; 2385 } 2386 2387 /* Go to the end of this refblock */ 2388 cluster = refblock_start + s->refcount_block_size - 1; 2389 } 2390 2391 if (reftable_offset < 0) { 2392 uint64_t post_refblock_start, reftable_clusters; 2393 2394 post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size); 2395 reftable_clusters = size_to_clusters(s, 2396 reftable_size * sizeof(uint64_t)); 2397 /* Not pretty but simple */ 2398 if (first_free_cluster < post_refblock_start) { 2399 first_free_cluster = post_refblock_start; 2400 } 2401 reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, 2402 refcount_table, nb_clusters, 2403 &first_free_cluster); 2404 if (reftable_offset < 0) { 2405 fprintf(stderr, "ERROR allocating reftable: %s\n", 2406 strerror(-reftable_offset)); 2407 res->check_errors++; 2408 ret = reftable_offset; 2409 goto fail; 2410 } 2411 2412 goto write_refblocks; 2413 } 2414 2415 for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { 2416 cpu_to_be64s(&on_disk_reftable[refblock_index]); 2417 } 2418 2419 ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, 2420 reftable_size * sizeof(uint64_t)); 2421 if (ret < 0) { 2422 fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); 2423 goto fail; 2424 } 2425 2426 assert(reftable_size < INT_MAX / sizeof(uint64_t)); 2427 ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, 2428 reftable_size * sizeof(uint64_t)); 2429 if (ret < 0) { 2430 fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); 2431 goto fail; 2432 } 2433 2434 /* Enter new reftable into the image header */ 2435 reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); 2436 reftable_offset_and_clusters.reftable_clusters = 2437 cpu_to_be32(size_to_clusters(s, reftable_size * sizeof(uint64_t))); 2438 ret = bdrv_pwrite_sync(bs->file, 2439 offsetof(QCowHeader, refcount_table_offset), 2440 &reftable_offset_and_clusters, 2441 sizeof(reftable_offset_and_clusters)); 2442 if (ret < 0) { 2443 fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret)); 2444 goto fail; 2445 } 2446 2447 for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { 2448 be64_to_cpus(&on_disk_reftable[refblock_index]); 2449 } 2450 s->refcount_table = on_disk_reftable; 2451 s->refcount_table_offset = reftable_offset; 2452 s->refcount_table_size = reftable_size; 2453 update_max_refcount_table_index(s); 2454 2455 return 0; 2456 2457 fail: 2458 g_free(on_disk_reftable); 2459 return ret; 2460 } 2461 2462 /* 2463 * Checks an image for refcount consistency. 2464 * 2465 * Returns 0 if no errors are found, the number of errors in case the image is 2466 * detected as corrupted, and -errno when an internal error occurred. 2467 */ 2468 int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, 2469 BdrvCheckMode fix) 2470 { 2471 BDRVQcow2State *s = bs->opaque; 2472 BdrvCheckResult pre_compare_res; 2473 int64_t size, highest_cluster, nb_clusters; 2474 void *refcount_table = NULL; 2475 bool rebuild = false; 2476 int ret; 2477 2478 size = bdrv_getlength(bs->file->bs); 2479 if (size < 0) { 2480 res->check_errors++; 2481 return size; 2482 } 2483 2484 nb_clusters = size_to_clusters(s, size); 2485 if (nb_clusters > INT_MAX) { 2486 res->check_errors++; 2487 return -EFBIG; 2488 } 2489 2490 res->bfi.total_clusters = 2491 size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE); 2492 2493 ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table, 2494 &nb_clusters); 2495 if (ret < 0) { 2496 goto fail; 2497 } 2498 2499 /* In case we don't need to rebuild the refcount structure (but want to fix 2500 * something), this function is immediately called again, in which case the 2501 * result should be ignored */ 2502 pre_compare_res = *res; 2503 compare_refcounts(bs, res, 0, &rebuild, &highest_cluster, refcount_table, 2504 nb_clusters); 2505 2506 if (rebuild && (fix & BDRV_FIX_ERRORS)) { 2507 BdrvCheckResult old_res = *res; 2508 int fresh_leaks = 0; 2509 2510 fprintf(stderr, "Rebuilding refcount structure\n"); 2511 ret = rebuild_refcount_structure(bs, res, &refcount_table, 2512 &nb_clusters); 2513 if (ret < 0) { 2514 goto fail; 2515 } 2516 2517 res->corruptions = 0; 2518 res->leaks = 0; 2519 2520 /* Because the old reftable has been exchanged for a new one the 2521 * references have to be recalculated */ 2522 rebuild = false; 2523 memset(refcount_table, 0, refcount_array_byte_size(s, nb_clusters)); 2524 ret = calculate_refcounts(bs, res, 0, &rebuild, &refcount_table, 2525 &nb_clusters); 2526 if (ret < 0) { 2527 goto fail; 2528 } 2529 2530 if (fix & BDRV_FIX_LEAKS) { 2531 /* The old refcount structures are now leaked, fix it; the result 2532 * can be ignored, aside from leaks which were introduced by 2533 * rebuild_refcount_structure() that could not be fixed */ 2534 BdrvCheckResult saved_res = *res; 2535 *res = (BdrvCheckResult){ 0 }; 2536 2537 compare_refcounts(bs, res, BDRV_FIX_LEAKS, &rebuild, 2538 &highest_cluster, refcount_table, nb_clusters); 2539 if (rebuild) { 2540 fprintf(stderr, "ERROR rebuilt refcount structure is still " 2541 "broken\n"); 2542 } 2543 2544 /* Any leaks accounted for here were introduced by 2545 * rebuild_refcount_structure() because that function has created a 2546 * new refcount structure from scratch */ 2547 fresh_leaks = res->leaks; 2548 *res = saved_res; 2549 } 2550 2551 if (res->corruptions < old_res.corruptions) { 2552 res->corruptions_fixed += old_res.corruptions - res->corruptions; 2553 } 2554 if (res->leaks < old_res.leaks) { 2555 res->leaks_fixed += old_res.leaks - res->leaks; 2556 } 2557 res->leaks += fresh_leaks; 2558 } else if (fix) { 2559 if (rebuild) { 2560 fprintf(stderr, "ERROR need to rebuild refcount structures\n"); 2561 res->check_errors++; 2562 ret = -EIO; 2563 goto fail; 2564 } 2565 2566 if (res->leaks || res->corruptions) { 2567 *res = pre_compare_res; 2568 compare_refcounts(bs, res, fix, &rebuild, &highest_cluster, 2569 refcount_table, nb_clusters); 2570 } 2571 } 2572 2573 /* check OFLAG_COPIED */ 2574 ret = check_oflag_copied(bs, res, fix); 2575 if (ret < 0) { 2576 goto fail; 2577 } 2578 2579 res->image_end_offset = (highest_cluster + 1) * s->cluster_size; 2580 ret = 0; 2581 2582 fail: 2583 g_free(refcount_table); 2584 2585 return ret; 2586 } 2587 2588 #define overlaps_with(ofs, sz) \ 2589 ranges_overlap(offset, size, ofs, sz) 2590 2591 /* 2592 * Checks if the given offset into the image file is actually free to use by 2593 * looking for overlaps with important metadata sections (L1/L2 tables etc.), 2594 * i.e. a sanity check without relying on the refcount tables. 2595 * 2596 * The ign parameter specifies what checks not to perform (being a bitmask of 2597 * QCow2MetadataOverlap values), i.e., what sections to ignore. 2598 * 2599 * Returns: 2600 * - 0 if writing to this offset will not affect the mentioned metadata 2601 * - a positive QCow2MetadataOverlap value indicating one overlapping section 2602 * - a negative value (-errno) indicating an error while performing a check, 2603 * e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2 2604 */ 2605 int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, 2606 int64_t size) 2607 { 2608 BDRVQcow2State *s = bs->opaque; 2609 int chk = s->overlap_check & ~ign; 2610 int i, j; 2611 2612 if (!size) { 2613 return 0; 2614 } 2615 2616 if (chk & QCOW2_OL_MAIN_HEADER) { 2617 if (offset < s->cluster_size) { 2618 return QCOW2_OL_MAIN_HEADER; 2619 } 2620 } 2621 2622 /* align range to test to cluster boundaries */ 2623 size = ROUND_UP(offset_into_cluster(s, offset) + size, s->cluster_size); 2624 offset = start_of_cluster(s, offset); 2625 2626 if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) { 2627 if (overlaps_with(s->l1_table_offset, s->l1_size * sizeof(uint64_t))) { 2628 return QCOW2_OL_ACTIVE_L1; 2629 } 2630 } 2631 2632 if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) { 2633 if (overlaps_with(s->refcount_table_offset, 2634 s->refcount_table_size * sizeof(uint64_t))) { 2635 return QCOW2_OL_REFCOUNT_TABLE; 2636 } 2637 } 2638 2639 if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) { 2640 if (overlaps_with(s->snapshots_offset, s->snapshots_size)) { 2641 return QCOW2_OL_SNAPSHOT_TABLE; 2642 } 2643 } 2644 2645 if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) { 2646 for (i = 0; i < s->nb_snapshots; i++) { 2647 if (s->snapshots[i].l1_size && 2648 overlaps_with(s->snapshots[i].l1_table_offset, 2649 s->snapshots[i].l1_size * sizeof(uint64_t))) { 2650 return QCOW2_OL_INACTIVE_L1; 2651 } 2652 } 2653 } 2654 2655 if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) { 2656 for (i = 0; i < s->l1_size; i++) { 2657 if ((s->l1_table[i] & L1E_OFFSET_MASK) && 2658 overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK, 2659 s->cluster_size)) { 2660 return QCOW2_OL_ACTIVE_L2; 2661 } 2662 } 2663 } 2664 2665 if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) { 2666 unsigned last_entry = s->max_refcount_table_index; 2667 assert(last_entry < s->refcount_table_size); 2668 assert(last_entry + 1 == s->refcount_table_size || 2669 (s->refcount_table[last_entry + 1] & REFT_OFFSET_MASK) == 0); 2670 for (i = 0; i <= last_entry; i++) { 2671 if ((s->refcount_table[i] & REFT_OFFSET_MASK) && 2672 overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK, 2673 s->cluster_size)) { 2674 return QCOW2_OL_REFCOUNT_BLOCK; 2675 } 2676 } 2677 } 2678 2679 if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) { 2680 for (i = 0; i < s->nb_snapshots; i++) { 2681 uint64_t l1_ofs = s->snapshots[i].l1_table_offset; 2682 uint32_t l1_sz = s->snapshots[i].l1_size; 2683 uint64_t l1_sz2 = l1_sz * sizeof(uint64_t); 2684 uint64_t *l1; 2685 int ret; 2686 2687 ret = qcow2_validate_table(bs, l1_ofs, l1_sz, sizeof(uint64_t), 2688 QCOW_MAX_L1_SIZE, "", NULL); 2689 if (ret < 0) { 2690 return ret; 2691 } 2692 2693 l1 = g_try_malloc(l1_sz2); 2694 2695 if (l1_sz2 && l1 == NULL) { 2696 return -ENOMEM; 2697 } 2698 2699 ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2); 2700 if (ret < 0) { 2701 g_free(l1); 2702 return ret; 2703 } 2704 2705 for (j = 0; j < l1_sz; j++) { 2706 uint64_t l2_ofs = be64_to_cpu(l1[j]) & L1E_OFFSET_MASK; 2707 if (l2_ofs && overlaps_with(l2_ofs, s->cluster_size)) { 2708 g_free(l1); 2709 return QCOW2_OL_INACTIVE_L2; 2710 } 2711 } 2712 2713 g_free(l1); 2714 } 2715 } 2716 2717 if ((chk & QCOW2_OL_BITMAP_DIRECTORY) && 2718 (s->autoclear_features & QCOW2_AUTOCLEAR_BITMAPS)) 2719 { 2720 if (overlaps_with(s->bitmap_directory_offset, 2721 s->bitmap_directory_size)) 2722 { 2723 return QCOW2_OL_BITMAP_DIRECTORY; 2724 } 2725 } 2726 2727 return 0; 2728 } 2729 2730 static const char *metadata_ol_names[] = { 2731 [QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header", 2732 [QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table", 2733 [QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table", 2734 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table", 2735 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block", 2736 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table", 2737 [QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table", 2738 [QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table", 2739 [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = "bitmap directory", 2740 }; 2741 QEMU_BUILD_BUG_ON(QCOW2_OL_MAX_BITNR != ARRAY_SIZE(metadata_ol_names)); 2742 2743 /* 2744 * First performs a check for metadata overlaps (through 2745 * qcow2_check_metadata_overlap); if that fails with a negative value (error 2746 * while performing a check), that value is returned. If an impending overlap 2747 * is detected, the BDS will be made unusable, the qcow2 file marked corrupt 2748 * and -EIO returned. 2749 * 2750 * Returns 0 if there were neither overlaps nor errors while checking for 2751 * overlaps; or a negative value (-errno) on error. 2752 */ 2753 int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, 2754 int64_t size) 2755 { 2756 int ret = qcow2_check_metadata_overlap(bs, ign, offset, size); 2757 2758 if (ret < 0) { 2759 return ret; 2760 } else if (ret > 0) { 2761 int metadata_ol_bitnr = ctz32(ret); 2762 assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR); 2763 2764 qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid " 2765 "write on metadata (overlaps with %s)", 2766 metadata_ol_names[metadata_ol_bitnr]); 2767 return -EIO; 2768 } 2769 2770 return 0; 2771 } 2772 2773 /* A pointer to a function of this type is given to walk_over_reftable(). That 2774 * function will create refblocks and pass them to a RefblockFinishOp once they 2775 * are completed (@refblock). @refblock_empty is set if the refblock is 2776 * completely empty. 2777 * 2778 * Along with the refblock, a corresponding reftable entry is passed, in the 2779 * reftable @reftable (which may be reallocated) at @reftable_index. 2780 * 2781 * @allocated should be set to true if a new cluster has been allocated. 2782 */ 2783 typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable, 2784 uint64_t reftable_index, uint64_t *reftable_size, 2785 void *refblock, bool refblock_empty, 2786 bool *allocated, Error **errp); 2787 2788 /** 2789 * This "operation" for walk_over_reftable() allocates the refblock on disk (if 2790 * it is not empty) and inserts its offset into the new reftable. The size of 2791 * this new reftable is increased as required. 2792 */ 2793 static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable, 2794 uint64_t reftable_index, uint64_t *reftable_size, 2795 void *refblock, bool refblock_empty, bool *allocated, 2796 Error **errp) 2797 { 2798 BDRVQcow2State *s = bs->opaque; 2799 int64_t offset; 2800 2801 if (!refblock_empty && reftable_index >= *reftable_size) { 2802 uint64_t *new_reftable; 2803 uint64_t new_reftable_size; 2804 2805 new_reftable_size = ROUND_UP(reftable_index + 1, 2806 s->cluster_size / sizeof(uint64_t)); 2807 if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) { 2808 error_setg(errp, 2809 "This operation would make the refcount table grow " 2810 "beyond the maximum size supported by QEMU, aborting"); 2811 return -ENOTSUP; 2812 } 2813 2814 new_reftable = g_try_realloc(*reftable, new_reftable_size * 2815 sizeof(uint64_t)); 2816 if (!new_reftable) { 2817 error_setg(errp, "Failed to increase reftable buffer size"); 2818 return -ENOMEM; 2819 } 2820 2821 memset(new_reftable + *reftable_size, 0, 2822 (new_reftable_size - *reftable_size) * sizeof(uint64_t)); 2823 2824 *reftable = new_reftable; 2825 *reftable_size = new_reftable_size; 2826 } 2827 2828 if (!refblock_empty && !(*reftable)[reftable_index]) { 2829 offset = qcow2_alloc_clusters(bs, s->cluster_size); 2830 if (offset < 0) { 2831 error_setg_errno(errp, -offset, "Failed to allocate refblock"); 2832 return offset; 2833 } 2834 (*reftable)[reftable_index] = offset; 2835 *allocated = true; 2836 } 2837 2838 return 0; 2839 } 2840 2841 /** 2842 * This "operation" for walk_over_reftable() writes the refblock to disk at the 2843 * offset specified by the new reftable's entry. It does not modify the new 2844 * reftable or change any refcounts. 2845 */ 2846 static int flush_refblock(BlockDriverState *bs, uint64_t **reftable, 2847 uint64_t reftable_index, uint64_t *reftable_size, 2848 void *refblock, bool refblock_empty, bool *allocated, 2849 Error **errp) 2850 { 2851 BDRVQcow2State *s = bs->opaque; 2852 int64_t offset; 2853 int ret; 2854 2855 if (reftable_index < *reftable_size && (*reftable)[reftable_index]) { 2856 offset = (*reftable)[reftable_index]; 2857 2858 ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); 2859 if (ret < 0) { 2860 error_setg_errno(errp, -ret, "Overlap check failed"); 2861 return ret; 2862 } 2863 2864 ret = bdrv_pwrite(bs->file, offset, refblock, s->cluster_size); 2865 if (ret < 0) { 2866 error_setg_errno(errp, -ret, "Failed to write refblock"); 2867 return ret; 2868 } 2869 } else { 2870 assert(refblock_empty); 2871 } 2872 2873 return 0; 2874 } 2875 2876 /** 2877 * This function walks over the existing reftable and every referenced refblock; 2878 * if @new_set_refcount is non-NULL, it is called for every refcount entry to 2879 * create an equal new entry in the passed @new_refblock. Once that 2880 * @new_refblock is completely filled, @operation will be called. 2881 * 2882 * @status_cb and @cb_opaque are used for the amend operation's status callback. 2883 * @index is the index of the walk_over_reftable() calls and @total is the total 2884 * number of walk_over_reftable() calls per amend operation. Both are used for 2885 * calculating the parameters for the status callback. 2886 * 2887 * @allocated is set to true if a new cluster has been allocated. 2888 */ 2889 static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, 2890 uint64_t *new_reftable_index, 2891 uint64_t *new_reftable_size, 2892 void *new_refblock, int new_refblock_size, 2893 int new_refcount_bits, 2894 RefblockFinishOp *operation, bool *allocated, 2895 Qcow2SetRefcountFunc *new_set_refcount, 2896 BlockDriverAmendStatusCB *status_cb, 2897 void *cb_opaque, int index, int total, 2898 Error **errp) 2899 { 2900 BDRVQcow2State *s = bs->opaque; 2901 uint64_t reftable_index; 2902 bool new_refblock_empty = true; 2903 int refblock_index; 2904 int new_refblock_index = 0; 2905 int ret; 2906 2907 for (reftable_index = 0; reftable_index < s->refcount_table_size; 2908 reftable_index++) 2909 { 2910 uint64_t refblock_offset = s->refcount_table[reftable_index] 2911 & REFT_OFFSET_MASK; 2912 2913 status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index, 2914 (uint64_t)total * s->refcount_table_size, cb_opaque); 2915 2916 if (refblock_offset) { 2917 void *refblock; 2918 2919 if (offset_into_cluster(s, refblock_offset)) { 2920 qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" 2921 PRIx64 " unaligned (reftable index: %#" 2922 PRIx64 ")", refblock_offset, 2923 reftable_index); 2924 error_setg(errp, 2925 "Image is corrupt (unaligned refblock offset)"); 2926 return -EIO; 2927 } 2928 2929 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset, 2930 &refblock); 2931 if (ret < 0) { 2932 error_setg_errno(errp, -ret, "Failed to retrieve refblock"); 2933 return ret; 2934 } 2935 2936 for (refblock_index = 0; refblock_index < s->refcount_block_size; 2937 refblock_index++) 2938 { 2939 uint64_t refcount; 2940 2941 if (new_refblock_index >= new_refblock_size) { 2942 /* new_refblock is now complete */ 2943 ret = operation(bs, new_reftable, *new_reftable_index, 2944 new_reftable_size, new_refblock, 2945 new_refblock_empty, allocated, errp); 2946 if (ret < 0) { 2947 qcow2_cache_put(s->refcount_block_cache, &refblock); 2948 return ret; 2949 } 2950 2951 (*new_reftable_index)++; 2952 new_refblock_index = 0; 2953 new_refblock_empty = true; 2954 } 2955 2956 refcount = s->get_refcount(refblock, refblock_index); 2957 if (new_refcount_bits < 64 && refcount >> new_refcount_bits) { 2958 uint64_t offset; 2959 2960 qcow2_cache_put(s->refcount_block_cache, &refblock); 2961 2962 offset = ((reftable_index << s->refcount_block_bits) 2963 + refblock_index) << s->cluster_bits; 2964 2965 error_setg(errp, "Cannot decrease refcount entry width to " 2966 "%i bits: Cluster at offset %#" PRIx64 " has a " 2967 "refcount of %" PRIu64, new_refcount_bits, 2968 offset, refcount); 2969 return -EINVAL; 2970 } 2971 2972 if (new_set_refcount) { 2973 new_set_refcount(new_refblock, new_refblock_index++, 2974 refcount); 2975 } else { 2976 new_refblock_index++; 2977 } 2978 new_refblock_empty = new_refblock_empty && refcount == 0; 2979 } 2980 2981 qcow2_cache_put(s->refcount_block_cache, &refblock); 2982 } else { 2983 /* No refblock means every refcount is 0 */ 2984 for (refblock_index = 0; refblock_index < s->refcount_block_size; 2985 refblock_index++) 2986 { 2987 if (new_refblock_index >= new_refblock_size) { 2988 /* new_refblock is now complete */ 2989 ret = operation(bs, new_reftable, *new_reftable_index, 2990 new_reftable_size, new_refblock, 2991 new_refblock_empty, allocated, errp); 2992 if (ret < 0) { 2993 return ret; 2994 } 2995 2996 (*new_reftable_index)++; 2997 new_refblock_index = 0; 2998 new_refblock_empty = true; 2999 } 3000 3001 if (new_set_refcount) { 3002 new_set_refcount(new_refblock, new_refblock_index++, 0); 3003 } else { 3004 new_refblock_index++; 3005 } 3006 } 3007 } 3008 } 3009 3010 if (new_refblock_index > 0) { 3011 /* Complete the potentially existing partially filled final refblock */ 3012 if (new_set_refcount) { 3013 for (; new_refblock_index < new_refblock_size; 3014 new_refblock_index++) 3015 { 3016 new_set_refcount(new_refblock, new_refblock_index, 0); 3017 } 3018 } 3019 3020 ret = operation(bs, new_reftable, *new_reftable_index, 3021 new_reftable_size, new_refblock, new_refblock_empty, 3022 allocated, errp); 3023 if (ret < 0) { 3024 return ret; 3025 } 3026 3027 (*new_reftable_index)++; 3028 } 3029 3030 status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size, 3031 (uint64_t)total * s->refcount_table_size, cb_opaque); 3032 3033 return 0; 3034 } 3035 3036 int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, 3037 BlockDriverAmendStatusCB *status_cb, 3038 void *cb_opaque, Error **errp) 3039 { 3040 BDRVQcow2State *s = bs->opaque; 3041 Qcow2GetRefcountFunc *new_get_refcount; 3042 Qcow2SetRefcountFunc *new_set_refcount; 3043 void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size); 3044 uint64_t *new_reftable = NULL, new_reftable_size = 0; 3045 uint64_t *old_reftable, old_reftable_size, old_reftable_offset; 3046 uint64_t new_reftable_index = 0; 3047 uint64_t i; 3048 int64_t new_reftable_offset = 0, allocated_reftable_size = 0; 3049 int new_refblock_size, new_refcount_bits = 1 << refcount_order; 3050 int old_refcount_order; 3051 int walk_index = 0; 3052 int ret; 3053 bool new_allocation; 3054 3055 assert(s->qcow_version >= 3); 3056 assert(refcount_order >= 0 && refcount_order <= 6); 3057 3058 /* see qcow2_open() */ 3059 new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3)); 3060 3061 new_get_refcount = get_refcount_funcs[refcount_order]; 3062 new_set_refcount = set_refcount_funcs[refcount_order]; 3063 3064 3065 do { 3066 int total_walks; 3067 3068 new_allocation = false; 3069 3070 /* At least we have to do this walk and the one which writes the 3071 * refblocks; also, at least we have to do this loop here at least 3072 * twice (normally), first to do the allocations, and second to 3073 * determine that everything is correctly allocated, this then makes 3074 * three walks in total */ 3075 total_walks = MAX(walk_index + 2, 3); 3076 3077 /* First, allocate the structures so they are present in the refcount 3078 * structures */ 3079 ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index, 3080 &new_reftable_size, NULL, new_refblock_size, 3081 new_refcount_bits, &alloc_refblock, 3082 &new_allocation, NULL, status_cb, cb_opaque, 3083 walk_index++, total_walks, errp); 3084 if (ret < 0) { 3085 goto done; 3086 } 3087 3088 new_reftable_index = 0; 3089 3090 if (new_allocation) { 3091 if (new_reftable_offset) { 3092 qcow2_free_clusters(bs, new_reftable_offset, 3093 allocated_reftable_size * sizeof(uint64_t), 3094 QCOW2_DISCARD_NEVER); 3095 } 3096 3097 new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size * 3098 sizeof(uint64_t)); 3099 if (new_reftable_offset < 0) { 3100 error_setg_errno(errp, -new_reftable_offset, 3101 "Failed to allocate the new reftable"); 3102 ret = new_reftable_offset; 3103 goto done; 3104 } 3105 allocated_reftable_size = new_reftable_size; 3106 } 3107 } while (new_allocation); 3108 3109 /* Second, write the new refblocks */ 3110 ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index, 3111 &new_reftable_size, new_refblock, 3112 new_refblock_size, new_refcount_bits, 3113 &flush_refblock, &new_allocation, new_set_refcount, 3114 status_cb, cb_opaque, walk_index, walk_index + 1, 3115 errp); 3116 if (ret < 0) { 3117 goto done; 3118 } 3119 assert(!new_allocation); 3120 3121 3122 /* Write the new reftable */ 3123 ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset, 3124 new_reftable_size * sizeof(uint64_t)); 3125 if (ret < 0) { 3126 error_setg_errno(errp, -ret, "Overlap check failed"); 3127 goto done; 3128 } 3129 3130 for (i = 0; i < new_reftable_size; i++) { 3131 cpu_to_be64s(&new_reftable[i]); 3132 } 3133 3134 ret = bdrv_pwrite(bs->file, new_reftable_offset, new_reftable, 3135 new_reftable_size * sizeof(uint64_t)); 3136 3137 for (i = 0; i < new_reftable_size; i++) { 3138 be64_to_cpus(&new_reftable[i]); 3139 } 3140 3141 if (ret < 0) { 3142 error_setg_errno(errp, -ret, "Failed to write the new reftable"); 3143 goto done; 3144 } 3145 3146 3147 /* Empty the refcount cache */ 3148 ret = qcow2_cache_flush(bs, s->refcount_block_cache); 3149 if (ret < 0) { 3150 error_setg_errno(errp, -ret, "Failed to flush the refblock cache"); 3151 goto done; 3152 } 3153 3154 /* Update the image header to point to the new reftable; this only updates 3155 * the fields which are relevant to qcow2_update_header(); other fields 3156 * such as s->refcount_table or s->refcount_bits stay stale for now 3157 * (because we have to restore everything if qcow2_update_header() fails) */ 3158 old_refcount_order = s->refcount_order; 3159 old_reftable_size = s->refcount_table_size; 3160 old_reftable_offset = s->refcount_table_offset; 3161 3162 s->refcount_order = refcount_order; 3163 s->refcount_table_size = new_reftable_size; 3164 s->refcount_table_offset = new_reftable_offset; 3165 3166 ret = qcow2_update_header(bs); 3167 if (ret < 0) { 3168 s->refcount_order = old_refcount_order; 3169 s->refcount_table_size = old_reftable_size; 3170 s->refcount_table_offset = old_reftable_offset; 3171 error_setg_errno(errp, -ret, "Failed to update the qcow2 header"); 3172 goto done; 3173 } 3174 3175 /* Now update the rest of the in-memory information */ 3176 old_reftable = s->refcount_table; 3177 s->refcount_table = new_reftable; 3178 update_max_refcount_table_index(s); 3179 3180 s->refcount_bits = 1 << refcount_order; 3181 s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); 3182 s->refcount_max += s->refcount_max - 1; 3183 3184 s->refcount_block_bits = s->cluster_bits - (refcount_order - 3); 3185 s->refcount_block_size = 1 << s->refcount_block_bits; 3186 3187 s->get_refcount = new_get_refcount; 3188 s->set_refcount = new_set_refcount; 3189 3190 /* For cleaning up all old refblocks and the old reftable below the "done" 3191 * label */ 3192 new_reftable = old_reftable; 3193 new_reftable_size = old_reftable_size; 3194 new_reftable_offset = old_reftable_offset; 3195 3196 done: 3197 if (new_reftable) { 3198 /* On success, new_reftable actually points to the old reftable (and 3199 * new_reftable_size is the old reftable's size); but that is just 3200 * fine */ 3201 for (i = 0; i < new_reftable_size; i++) { 3202 uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK; 3203 if (offset) { 3204 qcow2_free_clusters(bs, offset, s->cluster_size, 3205 QCOW2_DISCARD_OTHER); 3206 } 3207 } 3208 g_free(new_reftable); 3209 3210 if (new_reftable_offset > 0) { 3211 qcow2_free_clusters(bs, new_reftable_offset, 3212 new_reftable_size * sizeof(uint64_t), 3213 QCOW2_DISCARD_OTHER); 3214 } 3215 } 3216 3217 qemu_vfree(new_refblock); 3218 return ret; 3219 } 3220 3221 static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset) 3222 { 3223 BDRVQcow2State *s = bs->opaque; 3224 uint32_t index = offset_to_reftable_index(s, offset); 3225 int64_t covering_refblock_offset = 0; 3226 3227 if (index < s->refcount_table_size) { 3228 covering_refblock_offset = s->refcount_table[index] & REFT_OFFSET_MASK; 3229 } 3230 if (!covering_refblock_offset) { 3231 qcow2_signal_corruption(bs, true, -1, -1, "Refblock at %#" PRIx64 " is " 3232 "not covered by the refcount structures", 3233 offset); 3234 return -EIO; 3235 } 3236 3237 return covering_refblock_offset; 3238 } 3239 3240 static int qcow2_discard_refcount_block(BlockDriverState *bs, 3241 uint64_t discard_block_offs) 3242 { 3243 BDRVQcow2State *s = bs->opaque; 3244 int64_t refblock_offs; 3245 uint64_t cluster_index = discard_block_offs >> s->cluster_bits; 3246 uint32_t block_index = cluster_index & (s->refcount_block_size - 1); 3247 void *refblock; 3248 int ret; 3249 3250 refblock_offs = get_refblock_offset(bs, discard_block_offs); 3251 if (refblock_offs < 0) { 3252 return refblock_offs; 3253 } 3254 3255 assert(discard_block_offs != 0); 3256 3257 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs, 3258 &refblock); 3259 if (ret < 0) { 3260 return ret; 3261 } 3262 3263 if (s->get_refcount(refblock, block_index) != 1) { 3264 qcow2_signal_corruption(bs, true, -1, -1, "Invalid refcount:" 3265 " refblock offset %#" PRIx64 3266 ", reftable index %u" 3267 ", block offset %#" PRIx64 3268 ", refcount %#" PRIx64, 3269 refblock_offs, 3270 offset_to_reftable_index(s, discard_block_offs), 3271 discard_block_offs, 3272 s->get_refcount(refblock, block_index)); 3273 qcow2_cache_put(s->refcount_block_cache, &refblock); 3274 return -EINVAL; 3275 } 3276 s->set_refcount(refblock, block_index, 0); 3277 3278 qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock); 3279 3280 qcow2_cache_put(s->refcount_block_cache, &refblock); 3281 3282 if (cluster_index < s->free_cluster_index) { 3283 s->free_cluster_index = cluster_index; 3284 } 3285 3286 refblock = qcow2_cache_is_table_offset(s->refcount_block_cache, 3287 discard_block_offs); 3288 if (refblock) { 3289 /* discard refblock from the cache if refblock is cached */ 3290 qcow2_cache_discard(s->refcount_block_cache, refblock); 3291 } 3292 update_refcount_discard(bs, discard_block_offs, s->cluster_size); 3293 3294 return 0; 3295 } 3296 3297 int qcow2_shrink_reftable(BlockDriverState *bs) 3298 { 3299 BDRVQcow2State *s = bs->opaque; 3300 uint64_t *reftable_tmp = 3301 g_malloc(s->refcount_table_size * sizeof(uint64_t)); 3302 int i, ret; 3303 3304 for (i = 0; i < s->refcount_table_size; i++) { 3305 int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK; 3306 void *refblock; 3307 bool unused_block; 3308 3309 if (refblock_offs == 0) { 3310 reftable_tmp[i] = 0; 3311 continue; 3312 } 3313 ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs, 3314 &refblock); 3315 if (ret < 0) { 3316 goto out; 3317 } 3318 3319 /* the refblock has own reference */ 3320 if (i == offset_to_reftable_index(s, refblock_offs)) { 3321 uint64_t block_index = (refblock_offs >> s->cluster_bits) & 3322 (s->refcount_block_size - 1); 3323 uint64_t refcount = s->get_refcount(refblock, block_index); 3324 3325 s->set_refcount(refblock, block_index, 0); 3326 3327 unused_block = buffer_is_zero(refblock, s->cluster_size); 3328 3329 s->set_refcount(refblock, block_index, refcount); 3330 } else { 3331 unused_block = buffer_is_zero(refblock, s->cluster_size); 3332 } 3333 qcow2_cache_put(s->refcount_block_cache, &refblock); 3334 3335 reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]); 3336 } 3337 3338 ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, reftable_tmp, 3339 s->refcount_table_size * sizeof(uint64_t)); 3340 /* 3341 * If the write in the reftable failed the image may contain a partially 3342 * overwritten reftable. In this case it would be better to clear the 3343 * reftable in memory to avoid possible image corruption. 3344 */ 3345 for (i = 0; i < s->refcount_table_size; i++) { 3346 if (s->refcount_table[i] && !reftable_tmp[i]) { 3347 if (ret == 0) { 3348 ret = qcow2_discard_refcount_block(bs, s->refcount_table[i] & 3349 REFT_OFFSET_MASK); 3350 } 3351 s->refcount_table[i] = 0; 3352 } 3353 } 3354 3355 if (!s->cache_discards) { 3356 qcow2_process_discards(bs, ret); 3357 } 3358 3359 out: 3360 g_free(reftable_tmp); 3361 return ret; 3362 } 3363 3364 int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size) 3365 { 3366 BDRVQcow2State *s = bs->opaque; 3367 int64_t i; 3368 3369 for (i = size_to_clusters(s, size) - 1; i >= 0; i--) { 3370 uint64_t refcount; 3371 int ret = qcow2_get_refcount(bs, i, &refcount); 3372 if (ret < 0) { 3373 fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", 3374 i, strerror(-ret)); 3375 return ret; 3376 } 3377 if (refcount > 0) { 3378 return i; 3379 } 3380 } 3381 qcow2_signal_corruption(bs, true, -1, -1, 3382 "There are no references in the refcount table."); 3383 return -EIO; 3384 } 3385