1#!/bin/bash 2# 3# Test case for image corruption (overlapping data structures) in qcow2 4# 5# Copyright (C) 2013 Red Hat, Inc. 6# 7# This program is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation; either version 2 of the License, or 10# (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with this program. If not, see <http://www.gnu.org/licenses/>. 19# 20 21# creator 22owner=mreitz@redhat.com 23 24seq="$(basename $0)" 25echo "QA output created by $seq" 26 27here="$PWD" 28status=1 # failure is the default! 29 30_cleanup() 31{ 32 _cleanup_test_img 33} 34trap "_cleanup; exit \$status" 0 1 2 3 15 35 36# Sometimes the error line might be dumped before/after an event 37# randomly. Mask it out for specific test that may trigger this 38# uncertainty for current test for now. 39_filter_io_error() 40{ 41 sed '/Input\/output error/d' 42} 43 44# get standard environment, filters and checks 45. ./common.rc 46. ./common.filter 47 48# This tests qocw2-specific low-level functionality 49_supported_fmt qcow2 50_supported_proto file 51_supported_os Linux 52 53rt_offset=65536 # 0x10000 (XXX: just an assumption) 54rb_offset=131072 # 0x20000 (XXX: just an assumption) 55l1_offset=196608 # 0x30000 (XXX: just an assumption) 56l2_offset=262144 # 0x40000 (XXX: just an assumption) 57l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption) 58 59IMGOPTS="compat=1.1" 60 61OPEN_RW="open -o overlap-check=all $TEST_IMG" 62# Overlap checks are done before write operations only, therefore opening an 63# image read-only makes the overlap-check option irrelevant 64OPEN_RO="open -r $TEST_IMG" 65 66echo 67echo "=== Testing L2 reference into L1 ===" 68echo 69_make_test_img 64M 70# Link first L1 entry (first L2 table) onto itself 71# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any 72# later write will result in a COW operation, effectively ruining this attempt 73# on image corruption) 74poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00" 75_check_test_img 76 77# The corrupt bit should not be set anyway 78$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 79 80# Try to write something, thereby forcing the corrupt bit to be set 81$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 82 83# The corrupt bit must now be set 84$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 85 86# This information should be available through qemu-img info 87_img_info --format-specific 88 89# Try to open the image R/W (which should fail) 90$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \ 91 | _filter_testdir \ 92 | _filter_imgfmt 93 94# Try to open it RO (which should succeed) 95$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io 96 97# We could now try to fix the image, but this would probably fail (how should an 98# L2 table linked onto the L1 table be fixed?) 99 100echo 101echo "=== Testing cluster data reference into refcount block ===" 102echo 103_make_test_img 64M 104# Allocate L2 table 105truncate -s "$(($l2_offset+65536))" "$TEST_IMG" 106poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00" 107# Mark cluster as used 108poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01" 109# Redirect new data cluster onto refcount block 110poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00" 111_check_test_img 112$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 113$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 114$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 115 116# Try to fix it 117_check_test_img -r all 118 119# The corrupt bit should be cleared 120$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 121 122# Look if it's really really fixed 123$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 124$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 125 126echo 127echo "=== Testing cluster data reference into inactive L2 table ===" 128echo 129_make_test_img 64M 130$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io 131$QEMU_IMG snapshot -c foo "$TEST_IMG" 132$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io 133# The inactive L2 table remains at its old offset 134poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \ 135 "\x80\x00\x00\x00\x00\x04\x00\x00" 136_check_test_img 137$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 138$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io 139$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 140_check_test_img -r all 141$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 142$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io 143$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 144 145# Check data 146$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io 147$QEMU_IMG snapshot -a foo "$TEST_IMG" 148_check_test_img 149$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io 150 151echo 152echo "=== Testing overlap while COW is in flight ===" 153echo 154# compat=0.10 is required in order to make the following discard actually 155# unallocate the sector rather than make it a zero sector - we want COW, after 156# all. 157IMGOPTS='compat=0.10' _make_test_img 1G 158# Write two clusters, the second one enforces creation of an L2 table after 159# the first data cluster. 160$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io 161# Discard the first cluster. This cluster will soon enough be reallocated and 162# used for COW. 163$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io 164# Now, corrupt the image by marking the second L2 table cluster as free. 165poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c 166# Start a write operation requiring COW on the image stopping it right before 167# doing the read; then, trigger the corruption prevention by writing anything to 168# any unallocated cluster, leading to an attempt to overwrite the second L2 169# table. Finally, resume the COW write and see it fail (but not crash). 170echo "open -o file.driver=blkdebug $TEST_IMG 171break cow_read 0 172aio_write 0k 1k 173wait_break 0 174write 64k 64k 175resume 0" | $QEMU_IO | _filter_qemu_io 176 177echo 178echo "=== Testing unallocated image header ===" 179echo 180_make_test_img 64M 181# Create L1/L2 182$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 183poke_file "$TEST_IMG" "$rb_offset" "\x00\x00" 184$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 185 186echo 187echo "=== Testing unaligned L1 entry ===" 188echo 189_make_test_img 64M 190$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 191# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are 192# aligned or not does not matter 193poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 194$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 195 196# Test how well zero cluster expansion can cope with this 197_make_test_img 64M 198$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 199poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 200$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 201 202echo 203echo "=== Testing unaligned L2 entry ===" 204echo 205_make_test_img 64M 206$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 207poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 208$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 209 210echo 211echo "=== Testing unaligned pre-allocated zero cluster ===" 212echo 213_make_test_img 64M 214$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 215poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01" 216# zero cluster expansion 217$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 218 219echo 220echo "=== Testing unaligned reftable entry ===" 221echo 222_make_test_img 64M 223poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00" 224$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 225 226echo 227echo "=== Testing non-fatal corruption on freeing ===" 228echo 229_make_test_img 64M 230$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 231poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 232$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 233 234echo 235echo "=== Testing read-only corruption report ===" 236echo 237_make_test_img 64M 238$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 239poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 240# Should only emit a single error message 241$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io 242 243echo 244echo "=== Testing non-fatal and then fatal corruption report ===" 245echo 246_make_test_img 64M 247$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 248poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 249poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00" 250# Should emit two error messages 251$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io 252 253echo 254echo "=== Testing empty refcount table ===" 255echo 256_make_test_img 64M 257poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 258$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 259# Repair the image 260_check_test_img -r all 261 262echo 263echo "=== Testing empty refcount table with valid L1 and L2 tables ===" 264echo 265_make_test_img 64M 266$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 267poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 268# Since the first data cluster is already allocated this triggers an 269# allocation with an explicit offset (using qcow2_alloc_clusters_at()) 270# causing a refcount block to be allocated at offset 0 271$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 272# Repair the image 273_check_test_img -r all 274 275echo 276echo "=== Testing empty refcount block ===" 277echo 278_make_test_img 64M 279poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 280$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 281# Repair the image 282_check_test_img -r all 283 284echo 285echo "=== Testing empty refcount block with compressed write ===" 286echo 287_make_test_img 64M 288$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 289poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 290# The previous write already allocated an L2 table, so now this new 291# write will try to allocate a compressed data cluster at offset 0. 292$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io 293# Repair the image 294_check_test_img -r all 295 296echo 297echo "=== Testing zero refcount table size ===" 298echo 299_make_test_img 64M 300poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00" 301$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 302# Repair the image 303_check_test_img -r all 304 305echo 306echo "=== Testing incorrect refcount table offset ===" 307echo 308_make_test_img 64M 309poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00" 310$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 311 312echo 313echo "=== Testing dirty corrupt image ===" 314echo 315 316_make_test_img 64M 317 318# Let the refblock appear unaligned 319poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00" 320# Mark the image dirty, thus forcing an automatic check when opening it 321poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01" 322# Open the image (qemu should refuse to do so) 323$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 324 325echo '--- Repairing ---' 326 327# The actual repair should have happened (because of the dirty bit), 328# but some cleanup may have failed (like freeing the old reftable) 329# because the image was already marked corrupt by that point 330_check_test_img -r all 331 332echo 333echo "=== Writing to an unaligned preallocated zero cluster ===" 334echo 335 336_make_test_img 64M 337 338# Allocate the L2 table 339$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 340# Pretend there is a preallocated zero cluster somewhere inside the 341# image header 342poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01" 343# Let's write to it! 344$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 345 346echo '--- Repairing ---' 347_check_test_img -r all 348 349echo 350echo '=== Discarding with an unaligned refblock ===' 351echo 352 353_make_test_img 64M 354 355$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 356# Make our refblock unaligned 357poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00" 358# Now try to discard something that will be submitted as two requests 359# (main part + tail) 360$QEMU_IO -c "discard 0 65537" "$TEST_IMG" 361 362echo '--- Repairing ---' 363# Fails the first repair because the corruption prevents the check 364# function from double-checking 365# (Using -q for the first invocation, because otherwise the 366# double-check error message appears above the summary for some 367# reason -- so let's just hide the summary) 368_check_test_img -q -r all 369_check_test_img -r all 370 371echo 372echo "=== Discarding an out-of-bounds refblock ===" 373echo 374 375_make_test_img 64M 376 377# Pretend there's a refblock really up high 378poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00" 379# Let's try to shrink the qcow2 image so that the block driver tries 380# to discard that refblock (and see what happens!) 381$QEMU_IMG resize --shrink "$TEST_IMG" 32M 382 383echo '--- Checking and retrying ---' 384# Image should not be resized 385_img_info | grep 'virtual size' 386# But it should pass this check, because the "partial" resize has 387# already overwritten refblocks past the end 388_check_test_img -r all 389# So let's try again 390$QEMU_IMG resize --shrink "$TEST_IMG" 32M 391_img_info | grep 'virtual size' 392 393echo 394echo "=== Discarding a non-covered in-bounds refblock ===" 395echo 396 397IMGOPTS='refcount_bits=1' _make_test_img 64M 398 399# Pretend there's a refblock somewhere where there is no refblock to 400# cover it (but the covering refblock has a valid index in the 401# reftable) 402# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point 403# to 0x10_0000_0000 (64G) to point to the third refblock 404poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 405$QEMU_IMG resize --shrink "$TEST_IMG" 32M 406 407echo '--- Checking and retrying ---' 408# Image should not be resized 409_img_info | grep 'virtual size' 410# But it should pass this check, because the "partial" resize has 411# already overwritten refblocks past the end 412_check_test_img -r all 413# So let's try again 414$QEMU_IMG resize --shrink "$TEST_IMG" 32M 415_img_info | grep 'virtual size' 416 417echo 418echo "=== Discarding a refblock covered by an unaligned refblock ===" 419echo 420 421IMGOPTS='refcount_bits=1' _make_test_img 64M 422 423# Same as above 424poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 425# But now we actually "create" an unaligned third refblock 426poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00" 427$QEMU_IMG resize --shrink "$TEST_IMG" 32M 428 429echo '--- Repairing ---' 430# Fails the first repair because the corruption prevents the check 431# function from double-checking 432# (Using -q for the first invocation, because otherwise the 433# double-check error message appears above the summary for some 434# reason -- so let's just hide the summary) 435_check_test_img -q -r all 436_check_test_img -r all 437 438echo 439echo "=== Testing the QEMU shutdown with a corrupted image ===" 440echo 441_make_test_img 64M 442poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 443echo "{'execute': 'qmp_capabilities'} 444 {'execute': 'human-monitor-command', 445 'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}} 446 {'execute': 'quit'}" \ 447 | $QEMU -qmp stdio -nographic -nodefaults \ 448 -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \ 449 | _filter_qmp | _filter_qemu_io 450 451echo 452echo "=== Testing incoming inactive corrupted image ===" 453echo 454 455_make_test_img 64M 456# Create an unaligned L1 entry, so qemu will signal a corruption when 457# reading from the covered area 458poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a" 459 460# Inactive images are effectively read-only images, so this should be a 461# non-fatal corruption (which does not modify the image) 462echo "{'execute': 'qmp_capabilities'} 463 {'execute': 'human-monitor-command', 464 'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}} 465 {'execute': 'quit'}" \ 466 | $QEMU -qmp stdio -nographic -nodefaults \ 467 -blockdev "{'node-name': 'drive', 468 'driver': 'qcow2', 469 'file': { 470 'driver': 'file', 471 'filename': '$TEST_IMG' 472 }}" \ 473 -incoming exec:'cat /dev/null' \ 474 2>&1 \ 475 | _filter_qmp | _filter_qemu_io | _filter_io_error 476 477echo 478# Image should not have been marked corrupt 479_img_info --format-specific | grep 'corrupt:' 480 481# success, all done 482echo "*** done" 483rm -f $seq.full 484status=0 485