1#!/usr/bin/env bash 2# 3# Test case for image corruption (overlapping data structures) in qcow2 4# 5# Copyright (C) 2013 Red Hat, Inc. 6# 7# This program is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation; either version 2 of the License, or 10# (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with this program. If not, see <http://www.gnu.org/licenses/>. 19# 20 21# creator 22owner=mreitz@redhat.com 23 24seq="$(basename $0)" 25echo "QA output created by $seq" 26 27status=1 # failure is the default! 28 29_cleanup() 30{ 31 _cleanup_test_img 32} 33trap "_cleanup; exit \$status" 0 1 2 3 15 34 35# Sometimes the error line might be dumped before/after an event 36# randomly. Mask it out for specific test that may trigger this 37# uncertainty for current test for now. 38_filter_io_error() 39{ 40 sed '/Input\/output error/d' 41} 42 43# get standard environment, filters and checks 44. ./common.rc 45. ./common.filter 46 47# This tests qcow2-specific low-level functionality 48_supported_fmt qcow2 49_supported_proto file 50_supported_os Linux 51# These tests only work for compat=1.1 images without an external 52# data file with refcount_bits=16 53_unsupported_imgopts 'compat=0.10' data_file \ 54 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' 55 56# The repair process will create a large file - so check for availability first 57_require_large_file 64G 58 59rt_offset=65536 # 0x10000 (XXX: just an assumption) 60rb_offset=131072 # 0x20000 (XXX: just an assumption) 61l1_offset=196608 # 0x30000 (XXX: just an assumption) 62l2_offset=262144 # 0x40000 (XXX: just an assumption) 63l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption) 64 65OPEN_RW="open -o overlap-check=all $TEST_IMG" 66# Overlap checks are done before write operations only, therefore opening an 67# image read-only makes the overlap-check option irrelevant 68OPEN_RO="open -r $TEST_IMG" 69 70echo 71echo "=== Testing L2 reference into L1 ===" 72echo 73_make_test_img 64M 74# Link first L1 entry (first L2 table) onto itself 75# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any 76# later write will result in a COW operation, effectively ruining this attempt 77# on image corruption) 78poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00" 79_check_test_img 80 81# The corrupt bit should not be set anyway 82$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 83 84# Try to write something, thereby forcing the corrupt bit to be set 85$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 86 87# The corrupt bit must now be set 88$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 89 90# This information should be available through qemu-img info 91_img_info --format-specific 92 93# Try to open the image R/W (which should fail) 94$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \ 95 | _filter_testdir \ 96 | _filter_imgfmt 97 98# Try to open it RO (which should succeed) 99$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io 100 101# We could now try to fix the image, but this would probably fail (how should an 102# L2 table linked onto the L1 table be fixed?) 103 104echo 105echo "=== Testing cluster data reference into refcount block ===" 106echo 107_make_test_img 64M 108# Allocate L2 table 109truncate -s "$(($l2_offset+65536))" "$TEST_IMG" 110poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00" 111# Mark cluster as used 112poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01" 113# Redirect new data cluster onto refcount block 114poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00" 115_check_test_img 116$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 117$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 118$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 119 120# Try to fix it 121_check_test_img -r all 122 123# The corrupt bit should be cleared 124$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 125 126# Look if it's really really fixed 127$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 128$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 129 130echo 131echo "=== Testing cluster data reference into inactive L2 table ===" 132echo 133_make_test_img 64M 134$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io 135$QEMU_IMG snapshot -c foo "$TEST_IMG" 136$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io 137# The inactive L2 table remains at its old offset 138poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \ 139 "\x80\x00\x00\x00\x00\x04\x00\x00" 140_check_test_img 141$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 142$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io 143$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 144_check_test_img -r all 145$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 146$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io 147$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 148 149# Check data 150$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io 151$QEMU_IMG snapshot -a foo "$TEST_IMG" 152_check_test_img 153$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io 154 155echo 156echo "=== Testing overlap while COW is in flight ===" 157echo 158BACKING_IMG=$TEST_IMG.base 159TEST_IMG=$BACKING_IMG _make_test_img 1G 160 161$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io 162 163# compat=0.10 is required in order to make the following discard actually 164# unallocate the sector rather than make it a zero sector - we want COW, after 165# all. 166_make_test_img -o 'compat=0.10' -b "$BACKING_IMG" 1G 167# Write two clusters, the second one enforces creation of an L2 table after 168# the first data cluster. 169$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io 170# Discard the first cluster. This cluster will soon enough be reallocated and 171# used for COW. 172$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io 173# Now, corrupt the image by marking the second L2 table cluster as free. 174poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c 175# Start a write operation requiring COW on the image stopping it right before 176# doing the read; then, trigger the corruption prevention by writing anything to 177# any unallocated cluster, leading to an attempt to overwrite the second L2 178# table. Finally, resume the COW write and see it fail (but not crash). 179echo "open -o file.driver=blkdebug $TEST_IMG 180break cow_read 0 181aio_write 0k 1k 182wait_break 0 183write 64k 64k 184resume 0" | $QEMU_IO | _filter_qemu_io 185 186echo 187echo "=== Testing unallocated image header ===" 188echo 189_make_test_img 64M 190# Create L1/L2 191$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 192poke_file "$TEST_IMG" "$rb_offset" "\x00\x00" 193$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 194 195echo 196echo "=== Testing unaligned L1 entry ===" 197echo 198_make_test_img 64M 199$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 200# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are 201# aligned or not does not matter 202poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 203$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 204 205# Test how well zero cluster expansion can cope with this 206_make_test_img 64M 207$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 208poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 209$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 210 211echo 212echo "=== Testing unaligned L2 entry ===" 213echo 214_make_test_img 64M 215$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 216poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 217$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 218 219echo 220echo "=== Testing unaligned pre-allocated zero cluster ===" 221echo 222_make_test_img 64M 223$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 224poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01" 225# zero cluster expansion 226$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 227 228echo 229echo "=== Testing unaligned reftable entry ===" 230echo 231_make_test_img 64M 232poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00" 233$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 234 235echo 236echo "=== Testing non-fatal corruption on freeing ===" 237echo 238_make_test_img 64M 239$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 240poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 241$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 242 243echo 244echo "=== Testing read-only corruption report ===" 245echo 246_make_test_img 64M 247$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 248poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 249# Should only emit a single error message 250$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io 251 252echo 253echo "=== Testing non-fatal and then fatal corruption report ===" 254echo 255_make_test_img 64M 256$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 257poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 258poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00" 259# Should emit two error messages 260$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io 261 262echo 263echo "=== Testing empty refcount table ===" 264echo 265_make_test_img 64M 266poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 267$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 268# Repair the image 269_check_test_img -r all 270 271echo 272echo "=== Testing empty refcount table with valid L1 and L2 tables ===" 273echo 274_make_test_img 64M 275$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 276poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 277# Since the first data cluster is already allocated this triggers an 278# allocation with an explicit offset (using qcow2_alloc_clusters_at()) 279# causing a refcount block to be allocated at offset 0 280$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 281# Repair the image 282_check_test_img -r all 283 284echo 285echo "=== Testing empty refcount block ===" 286echo 287_make_test_img 64M 288poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 289$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 290# Repair the image 291_check_test_img -r all 292 293echo 294echo "=== Testing empty refcount block with compressed write ===" 295echo 296_make_test_img 64M 297$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 298poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 299# The previous write already allocated an L2 table, so now this new 300# write will try to allocate a compressed data cluster at offset 0. 301$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io 302# Repair the image 303_check_test_img -r all 304 305echo 306echo "=== Testing zero refcount table size ===" 307echo 308_make_test_img 64M 309poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00" 310$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 311# Repair the image 312_check_test_img -r all 313 314echo 315echo "=== Testing incorrect refcount table offset ===" 316echo 317_make_test_img 64M 318poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00" 319$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 320 321echo 322echo "=== Testing dirty corrupt image ===" 323echo 324 325_make_test_img 64M 326 327# Let the refblock appear unaligned 328poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00" 329# Mark the image dirty, thus forcing an automatic check when opening it 330poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01" 331# Open the image (qemu should refuse to do so) 332$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 333 334echo '--- Repairing ---' 335 336# The actual repair should have happened (because of the dirty bit), 337# but some cleanup may have failed (like freeing the old reftable) 338# because the image was already marked corrupt by that point 339_check_test_img -r all 340 341echo 342echo "=== Writing to an unaligned preallocated zero cluster ===" 343echo 344 345_make_test_img 64M 346 347# Allocate the L2 table 348$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 349# Pretend there is a preallocated zero cluster somewhere inside the 350# image header 351poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01" 352# Let's write to it! 353$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 354 355echo '--- Repairing ---' 356_check_test_img -r all 357 358echo 359echo '=== Discarding with an unaligned refblock ===' 360echo 361 362_make_test_img 64M 363 364$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 365# Make our refblock unaligned 366poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00" 367# Now try to discard something that will be submitted as two requests 368# (main part + tail) 369$QEMU_IO -c "discard 0 65537" "$TEST_IMG" 370 371echo '--- Repairing ---' 372# Fails the first repair because the corruption prevents the check 373# function from double-checking 374# (Using -q for the first invocation, because otherwise the 375# double-check error message appears above the summary for some 376# reason -- so let's just hide the summary) 377_check_test_img -q -r all 378_check_test_img -r all 379 380echo 381echo "=== Discarding an out-of-bounds refblock ===" 382echo 383 384_make_test_img 64M 385 386# Pretend there's a refblock really up high 387poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00" 388# Let's try to shrink the qcow2 image so that the block driver tries 389# to discard that refblock (and see what happens!) 390$QEMU_IMG resize --shrink "$TEST_IMG" 32M 391 392echo '--- Checking and retrying ---' 393# Image should not be resized 394_img_info | grep 'virtual size' 395# But it should pass this check, because the "partial" resize has 396# already overwritten refblocks past the end 397_check_test_img -r all 398# So let's try again 399$QEMU_IMG resize --shrink "$TEST_IMG" 32M 400_img_info | grep 'virtual size' 401 402echo 403echo "=== Discarding a non-covered in-bounds refblock ===" 404echo 405 406_make_test_img -o 'refcount_bits=1' 64M 407 408# Pretend there's a refblock somewhere where there is no refblock to 409# cover it (but the covering refblock has a valid index in the 410# reftable) 411# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point 412# to 0x10_0000_0000 (64G) to point to the third refblock 413poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 414$QEMU_IMG resize --shrink "$TEST_IMG" 32M 415 416echo '--- Checking and retrying ---' 417# Image should not be resized 418_img_info | grep 'virtual size' 419# But it should pass this check, because the "partial" resize has 420# already overwritten refblocks past the end 421_check_test_img -r all 422# So let's try again 423$QEMU_IMG resize --shrink "$TEST_IMG" 32M 424_img_info | grep 'virtual size' 425 426echo 427echo "=== Discarding a refblock covered by an unaligned refblock ===" 428echo 429 430_make_test_img -o 'refcount_bits=1' 64M 431 432# Same as above 433poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 434# But now we actually "create" an unaligned third refblock 435poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00" 436$QEMU_IMG resize --shrink "$TEST_IMG" 32M 437 438echo '--- Repairing ---' 439# Fails the first repair because the corruption prevents the check 440# function from double-checking 441# (Using -q for the first invocation, because otherwise the 442# double-check error message appears above the summary for some 443# reason -- so let's just hide the summary) 444_check_test_img -q -r all 445_check_test_img -r all 446 447echo 448echo "=== Testing the QEMU shutdown with a corrupted image ===" 449echo 450_make_test_img 64M 451poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 452echo "{'execute': 'qmp_capabilities'} 453 {'execute': 'human-monitor-command', 454 'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}} 455 {'execute': 'quit'}" \ 456 | $QEMU -qmp stdio -nographic -nodefaults \ 457 -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \ 458 | _filter_qmp | _filter_qemu_io 459 460echo 461echo "=== Testing incoming inactive corrupted image ===" 462echo 463 464_make_test_img 64M 465# Create an unaligned L1 entry, so qemu will signal a corruption when 466# reading from the covered area 467poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a" 468 469# Inactive images are effectively read-only images, so this should be a 470# non-fatal corruption (which does not modify the image) 471echo "{'execute': 'qmp_capabilities'} 472 {'execute': 'human-monitor-command', 473 'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}} 474 {'execute': 'quit'}" \ 475 | $QEMU -qmp stdio -nographic -nodefaults \ 476 -blockdev "{'node-name': 'drive', 477 'driver': 'qcow2', 478 'file': { 479 'driver': 'file', 480 'filename': '$TEST_IMG' 481 }}" \ 482 -incoming exec:'cat /dev/null' \ 483 2>&1 \ 484 | _filter_qmp | _filter_qemu_io | _filter_io_error 485 486echo 487# Image should not have been marked corrupt 488_img_info --format-specific | grep 'corrupt:' 489 490# success, all done 491echo "*** done" 492rm -f $seq.full 493status=0 494