1#!/bin/bash 2# 3# Test case for image corruption (overlapping data structures) in qcow2 4# 5# Copyright (C) 2013 Red Hat, Inc. 6# 7# This program is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation; either version 2 of the License, or 10# (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with this program. If not, see <http://www.gnu.org/licenses/>. 19# 20 21# creator 22owner=mreitz@redhat.com 23 24seq="$(basename $0)" 25echo "QA output created by $seq" 26 27here="$PWD" 28status=1 # failure is the default! 29 30_cleanup() 31{ 32 _cleanup_test_img 33} 34trap "_cleanup; exit \$status" 0 1 2 3 15 35 36# get standard environment, filters and checks 37. ./common.rc 38. ./common.filter 39 40# This tests qocw2-specific low-level functionality 41_supported_fmt qcow2 42_supported_proto file 43_supported_os Linux 44 45rt_offset=65536 # 0x10000 (XXX: just an assumption) 46rb_offset=131072 # 0x20000 (XXX: just an assumption) 47l1_offset=196608 # 0x30000 (XXX: just an assumption) 48l2_offset=262144 # 0x40000 (XXX: just an assumption) 49l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption) 50 51IMGOPTS="compat=1.1" 52 53OPEN_RW="open -o overlap-check=all $TEST_IMG" 54# Overlap checks are done before write operations only, therefore opening an 55# image read-only makes the overlap-check option irrelevant 56OPEN_RO="open -r $TEST_IMG" 57 58echo 59echo "=== Testing L2 reference into L1 ===" 60echo 61_make_test_img 64M 62# Link first L1 entry (first L2 table) onto itself 63# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any 64# later write will result in a COW operation, effectively ruining this attempt 65# on image corruption) 66poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00" 67_check_test_img 68 69# The corrupt bit should not be set anyway 70$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 71 72# Try to write something, thereby forcing the corrupt bit to be set 73$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 74 75# The corrupt bit must now be set 76$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 77 78# This information should be available through qemu-img info 79_img_info --format-specific 80 81# Try to open the image R/W (which should fail) 82$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \ 83 | _filter_testdir \ 84 | _filter_imgfmt 85 86# Try to open it RO (which should succeed) 87$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io 88 89# We could now try to fix the image, but this would probably fail (how should an 90# L2 table linked onto the L1 table be fixed?) 91 92echo 93echo "=== Testing cluster data reference into refcount block ===" 94echo 95_make_test_img 64M 96# Allocate L2 table 97truncate -s "$(($l2_offset+65536))" "$TEST_IMG" 98poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00" 99# Mark cluster as used 100poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01" 101# Redirect new data cluster onto refcount block 102poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00" 103_check_test_img 104$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 105$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 106$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 107 108# Try to fix it 109_check_test_img -r all 110 111# The corrupt bit should be cleared 112$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 113 114# Look if it's really really fixed 115$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 116$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 117 118echo 119echo "=== Testing cluster data reference into inactive L2 table ===" 120echo 121_make_test_img 64M 122$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io 123$QEMU_IMG snapshot -c foo "$TEST_IMG" 124$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io 125# The inactive L2 table remains at its old offset 126poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \ 127 "\x80\x00\x00\x00\x00\x04\x00\x00" 128_check_test_img 129$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 130$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io 131$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 132_check_test_img -r all 133$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 134$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io 135$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 136 137# Check data 138$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io 139$QEMU_IMG snapshot -a foo "$TEST_IMG" 140_check_test_img 141$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io 142 143echo 144echo "=== Testing overlap while COW is in flight ===" 145echo 146# compat=0.10 is required in order to make the following discard actually 147# unallocate the sector rather than make it a zero sector - we want COW, after 148# all. 149IMGOPTS='compat=0.10' _make_test_img 1G 150# Write two clusters, the second one enforces creation of an L2 table after 151# the first data cluster. 152$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io 153# Discard the first cluster. This cluster will soon enough be reallocated and 154# used for COW. 155$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io 156# Now, corrupt the image by marking the second L2 table cluster as free. 157poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c 158# Start a write operation requiring COW on the image stopping it right before 159# doing the read; then, trigger the corruption prevention by writing anything to 160# any unallocated cluster, leading to an attempt to overwrite the second L2 161# table. Finally, resume the COW write and see it fail (but not crash). 162echo "open -o file.driver=blkdebug $TEST_IMG 163break cow_read 0 164aio_write 0k 1k 165wait_break 0 166write 64k 64k 167resume 0" | $QEMU_IO | _filter_qemu_io 168 169echo 170echo "=== Testing unallocated image header ===" 171echo 172_make_test_img 64M 173# Create L1/L2 174$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 175poke_file "$TEST_IMG" "$rb_offset" "\x00\x00" 176$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 177 178echo 179echo "=== Testing unaligned L1 entry ===" 180echo 181_make_test_img 64M 182$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 183# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are 184# aligned or not does not matter 185poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 186$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 187 188# Test how well zero cluster expansion can cope with this 189_make_test_img 64M 190$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 191poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 192$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 193 194echo 195echo "=== Testing unaligned L2 entry ===" 196echo 197_make_test_img 64M 198$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 199poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 200$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 201 202echo 203echo "=== Testing unaligned pre-allocated zero cluster ===" 204echo 205_make_test_img 64M 206$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 207poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01" 208# zero cluster expansion 209$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 210 211echo 212echo "=== Testing unaligned reftable entry ===" 213echo 214_make_test_img 64M 215poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00" 216$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 217 218echo 219echo "=== Testing non-fatal corruption on freeing ===" 220echo 221_make_test_img 64M 222$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 223poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 224$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 225 226echo 227echo "=== Testing read-only corruption report ===" 228echo 229_make_test_img 64M 230$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 231poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 232# Should only emit a single error message 233$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io 234 235echo 236echo "=== Testing non-fatal and then fatal corruption report ===" 237echo 238_make_test_img 64M 239$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 240poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 241poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00" 242# Should emit two error messages 243$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io 244 245echo 246echo "=== Testing empty refcount table ===" 247echo 248_make_test_img 64M 249poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 250$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 251# Repair the image 252_check_test_img -r all 253 254echo 255echo "=== Testing empty refcount table with valid L1 and L2 tables ===" 256echo 257_make_test_img 64M 258$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 259poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 260# Since the first data cluster is already allocated this triggers an 261# allocation with an explicit offset (using qcow2_alloc_clusters_at()) 262# causing a refcount block to be allocated at offset 0 263$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 264# Repair the image 265_check_test_img -r all 266 267echo 268echo "=== Testing empty refcount block ===" 269echo 270_make_test_img 64M 271poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 272$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 273# Repair the image 274_check_test_img -r all 275 276echo 277echo "=== Testing empty refcount block with compressed write ===" 278echo 279_make_test_img 64M 280$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 281poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 282# The previous write already allocated an L2 table, so now this new 283# write will try to allocate a compressed data cluster at offset 0. 284$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io 285# Repair the image 286_check_test_img -r all 287 288echo 289echo "=== Testing zero refcount table size ===" 290echo 291_make_test_img 64M 292poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00" 293$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 294# Repair the image 295_check_test_img -r all 296 297echo 298echo "=== Testing incorrect refcount table offset ===" 299echo 300_make_test_img 64M 301poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00" 302$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 303 304echo 305echo "=== Testing dirty corrupt image ===" 306echo 307 308_make_test_img 64M 309 310# Let the refblock appear unaligned 311poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00" 312# Mark the image dirty, thus forcing an automatic check when opening it 313poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01" 314# Open the image (qemu should refuse to do so) 315$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 316 317echo '--- Repairing ---' 318 319# The actual repair should have happened (because of the dirty bit), 320# but some cleanup may have failed (like freeing the old reftable) 321# because the image was already marked corrupt by that point 322_check_test_img -r all 323 324echo 325echo "=== Writing to an unaligned preallocated zero cluster ===" 326echo 327 328_make_test_img 64M 329 330# Allocate the L2 table 331$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 332# Pretend there is a preallocated zero cluster somewhere inside the 333# image header 334poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01" 335# Let's write to it! 336$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 337 338echo '--- Repairing ---' 339_check_test_img -r all 340 341echo 342echo '=== Discarding with an unaligned refblock ===' 343echo 344 345_make_test_img 64M 346 347$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 348# Make our refblock unaligned 349poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00" 350# Now try to discard something that will be submitted as two requests 351# (main part + tail) 352$QEMU_IO -c "discard 0 65537" "$TEST_IMG" 353 354echo '--- Repairing ---' 355# Fails the first repair because the corruption prevents the check 356# function from double-checking 357# (Using -q for the first invocation, because otherwise the 358# double-check error message appears above the summary for some 359# reason -- so let's just hide the summary) 360_check_test_img -q -r all 361_check_test_img -r all 362 363echo 364echo "=== Discarding an out-of-bounds refblock ===" 365echo 366 367_make_test_img 64M 368 369# Pretend there's a refblock really up high 370poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00" 371# Let's try to shrink the qcow2 image so that the block driver tries 372# to discard that refblock (and see what happens!) 373$QEMU_IMG resize --shrink "$TEST_IMG" 32M 374 375echo '--- Checking and retrying ---' 376# Image should not be resized 377_img_info | grep 'virtual size' 378# But it should pass this check, because the "partial" resize has 379# already overwritten refblocks past the end 380_check_test_img -r all 381# So let's try again 382$QEMU_IMG resize --shrink "$TEST_IMG" 32M 383_img_info | grep 'virtual size' 384 385echo 386echo "=== Discarding a non-covered in-bounds refblock ===" 387echo 388 389IMGOPTS='refcount_bits=1' _make_test_img 64M 390 391# Pretend there's a refblock somewhere where there is no refblock to 392# cover it (but the covering refblock has a valid index in the 393# reftable) 394# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point 395# to 0x10_0000_0000 (64G) to point to the third refblock 396poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 397$QEMU_IMG resize --shrink "$TEST_IMG" 32M 398 399echo '--- Checking and retrying ---' 400# Image should not be resized 401_img_info | grep 'virtual size' 402# But it should pass this check, because the "partial" resize has 403# already overwritten refblocks past the end 404_check_test_img -r all 405# So let's try again 406$QEMU_IMG resize --shrink "$TEST_IMG" 32M 407_img_info | grep 'virtual size' 408 409echo 410echo "=== Discarding a refblock covered by an unaligned refblock ===" 411echo 412 413IMGOPTS='refcount_bits=1' _make_test_img 64M 414 415# Same as above 416poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 417# But now we actually "create" an unaligned third refblock 418poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00" 419$QEMU_IMG resize --shrink "$TEST_IMG" 32M 420 421echo '--- Repairing ---' 422# Fails the first repair because the corruption prevents the check 423# function from double-checking 424# (Using -q for the first invocation, because otherwise the 425# double-check error message appears above the summary for some 426# reason -- so let's just hide the summary) 427_check_test_img -q -r all 428_check_test_img -r all 429 430echo 431echo "=== Testing the QEMU shutdown with a corrupted image ===" 432echo 433_make_test_img 64M 434poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 435echo "{'execute': 'qmp_capabilities'} 436 {'execute': 'human-monitor-command', 437 'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}} 438 {'execute': 'quit'}" \ 439 | $QEMU -qmp stdio -nographic -nodefaults \ 440 -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \ 441 | _filter_qmp | _filter_qemu_io 442 443echo 444echo "=== Testing incoming inactive corrupted image ===" 445echo 446 447_make_test_img 64M 448# Create an unaligned L1 entry, so qemu will signal a corruption when 449# reading from the covered area 450poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a" 451 452# Inactive images are effectively read-only images, so this should be a 453# non-fatal corruption (which does not modify the image) 454echo "{'execute': 'qmp_capabilities'} 455 {'execute': 'human-monitor-command', 456 'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}} 457 {'execute': 'quit'}" \ 458 | $QEMU -qmp stdio -nographic -nodefaults \ 459 -blockdev "{'node-name': 'drive', 460 'driver': 'qcow2', 461 'file': { 462 'driver': 'file', 463 'filename': '$TEST_IMG' 464 }}" \ 465 -incoming exec:'cat /dev/null' \ 466 2>&1 \ 467 | _filter_qmp | _filter_qemu_io 468 469echo 470# Image should not have been marked corrupt 471_img_info --format-specific | grep 'corrupt:' 472 473# success, all done 474echo "*** done" 475rm -f $seq.full 476status=0 477