1#!/usr/bin/env bash 2# 3# Test case for image corruption (overlapping data structures) in qcow2 4# 5# Copyright (C) 2013 Red Hat, Inc. 6# 7# This program is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation; either version 2 of the License, or 10# (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with this program. If not, see <http://www.gnu.org/licenses/>. 19# 20 21# creator 22owner=mreitz@redhat.com 23 24seq="$(basename $0)" 25echo "QA output created by $seq" 26 27status=1 # failure is the default! 28 29_cleanup() 30{ 31 _cleanup_test_img 32} 33trap "_cleanup; exit \$status" 0 1 2 3 15 34 35# Sometimes the error line might be dumped before/after an event 36# randomly. Mask it out for specific test that may trigger this 37# uncertainty for current test for now. 38_filter_io_error() 39{ 40 sed '/Input\/output error/d' 41} 42 43# get standard environment, filters and checks 44. ./common.rc 45. ./common.filter 46 47# This tests qocw2-specific low-level functionality 48_supported_fmt qcow2 49_supported_proto file 50_supported_os Linux 51 52# The repair process will create a large file - so check for availability first 53_require_large_file 64G 54 55rt_offset=65536 # 0x10000 (XXX: just an assumption) 56rb_offset=131072 # 0x20000 (XXX: just an assumption) 57l1_offset=196608 # 0x30000 (XXX: just an assumption) 58l2_offset=262144 # 0x40000 (XXX: just an assumption) 59l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption) 60 61IMGOPTS="compat=1.1" 62 63OPEN_RW="open -o overlap-check=all $TEST_IMG" 64# Overlap checks are done before write operations only, therefore opening an 65# image read-only makes the overlap-check option irrelevant 66OPEN_RO="open -r $TEST_IMG" 67 68echo 69echo "=== Testing L2 reference into L1 ===" 70echo 71_make_test_img 64M 72# Link first L1 entry (first L2 table) onto itself 73# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any 74# later write will result in a COW operation, effectively ruining this attempt 75# on image corruption) 76poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00" 77_check_test_img 78 79# The corrupt bit should not be set anyway 80$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 81 82# Try to write something, thereby forcing the corrupt bit to be set 83$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 84 85# The corrupt bit must now be set 86$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 87 88# This information should be available through qemu-img info 89_img_info --format-specific 90 91# Try to open the image R/W (which should fail) 92$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \ 93 | _filter_testdir \ 94 | _filter_imgfmt 95 96# Try to open it RO (which should succeed) 97$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io 98 99# We could now try to fix the image, but this would probably fail (how should an 100# L2 table linked onto the L1 table be fixed?) 101 102echo 103echo "=== Testing cluster data reference into refcount block ===" 104echo 105_make_test_img 64M 106# Allocate L2 table 107truncate -s "$(($l2_offset+65536))" "$TEST_IMG" 108poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00" 109# Mark cluster as used 110poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01" 111# Redirect new data cluster onto refcount block 112poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00" 113_check_test_img 114$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 115$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 116$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 117 118# Try to fix it 119_check_test_img -r all 120 121# The corrupt bit should be cleared 122$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 123 124# Look if it's really really fixed 125$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 126$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 127 128echo 129echo "=== Testing cluster data reference into inactive L2 table ===" 130echo 131_make_test_img 64M 132$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io 133$QEMU_IMG snapshot -c foo "$TEST_IMG" 134$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io 135# The inactive L2 table remains at its old offset 136poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \ 137 "\x80\x00\x00\x00\x00\x04\x00\x00" 138_check_test_img 139$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 140$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io 141$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 142_check_test_img -r all 143$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 144$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io 145$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 146 147# Check data 148$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io 149$QEMU_IMG snapshot -a foo "$TEST_IMG" 150_check_test_img 151$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io 152 153echo 154echo "=== Testing overlap while COW is in flight ===" 155echo 156BACKING_IMG=$TEST_IMG.base 157TEST_IMG=$BACKING_IMG _make_test_img 1G 158 159$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io 160 161# compat=0.10 is required in order to make the following discard actually 162# unallocate the sector rather than make it a zero sector - we want COW, after 163# all. 164IMGOPTS='compat=0.10' _make_test_img -b "$BACKING_IMG" 1G 165# Write two clusters, the second one enforces creation of an L2 table after 166# the first data cluster. 167$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io 168# Discard the first cluster. This cluster will soon enough be reallocated and 169# used for COW. 170$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io 171# Now, corrupt the image by marking the second L2 table cluster as free. 172poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c 173# Start a write operation requiring COW on the image stopping it right before 174# doing the read; then, trigger the corruption prevention by writing anything to 175# any unallocated cluster, leading to an attempt to overwrite the second L2 176# table. Finally, resume the COW write and see it fail (but not crash). 177echo "open -o file.driver=blkdebug $TEST_IMG 178break cow_read 0 179aio_write 0k 1k 180wait_break 0 181write 64k 64k 182resume 0" | $QEMU_IO | _filter_qemu_io 183 184echo 185echo "=== Testing unallocated image header ===" 186echo 187_make_test_img 64M 188# Create L1/L2 189$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 190poke_file "$TEST_IMG" "$rb_offset" "\x00\x00" 191$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 192 193echo 194echo "=== Testing unaligned L1 entry ===" 195echo 196_make_test_img 64M 197$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 198# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are 199# aligned or not does not matter 200poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 201$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 202 203# Test how well zero cluster expansion can cope with this 204_make_test_img 64M 205$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 206poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 207$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 208 209echo 210echo "=== Testing unaligned L2 entry ===" 211echo 212_make_test_img 64M 213$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 214poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 215$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 216 217echo 218echo "=== Testing unaligned pre-allocated zero cluster ===" 219echo 220_make_test_img 64M 221$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 222poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01" 223# zero cluster expansion 224$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 225 226echo 227echo "=== Testing unaligned reftable entry ===" 228echo 229_make_test_img 64M 230poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00" 231$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 232 233echo 234echo "=== Testing non-fatal corruption on freeing ===" 235echo 236_make_test_img 64M 237$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 238poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 239$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 240 241echo 242echo "=== Testing read-only corruption report ===" 243echo 244_make_test_img 64M 245$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 246poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 247# Should only emit a single error message 248$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io 249 250echo 251echo "=== Testing non-fatal and then fatal corruption report ===" 252echo 253_make_test_img 64M 254$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 255poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 256poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00" 257# Should emit two error messages 258$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io 259 260echo 261echo "=== Testing empty refcount table ===" 262echo 263_make_test_img 64M 264poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 265$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 266# Repair the image 267_check_test_img -r all 268 269echo 270echo "=== Testing empty refcount table with valid L1 and L2 tables ===" 271echo 272_make_test_img 64M 273$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 274poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 275# Since the first data cluster is already allocated this triggers an 276# allocation with an explicit offset (using qcow2_alloc_clusters_at()) 277# causing a refcount block to be allocated at offset 0 278$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 279# Repair the image 280_check_test_img -r all 281 282echo 283echo "=== Testing empty refcount block ===" 284echo 285_make_test_img 64M 286poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 287$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 288# Repair the image 289_check_test_img -r all 290 291echo 292echo "=== Testing empty refcount block with compressed write ===" 293echo 294_make_test_img 64M 295$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 296poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 297# The previous write already allocated an L2 table, so now this new 298# write will try to allocate a compressed data cluster at offset 0. 299$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io 300# Repair the image 301_check_test_img -r all 302 303echo 304echo "=== Testing zero refcount table size ===" 305echo 306_make_test_img 64M 307poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00" 308$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 309# Repair the image 310_check_test_img -r all 311 312echo 313echo "=== Testing incorrect refcount table offset ===" 314echo 315_make_test_img 64M 316poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00" 317$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 318 319echo 320echo "=== Testing dirty corrupt image ===" 321echo 322 323_make_test_img 64M 324 325# Let the refblock appear unaligned 326poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00" 327# Mark the image dirty, thus forcing an automatic check when opening it 328poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01" 329# Open the image (qemu should refuse to do so) 330$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 331 332echo '--- Repairing ---' 333 334# The actual repair should have happened (because of the dirty bit), 335# but some cleanup may have failed (like freeing the old reftable) 336# because the image was already marked corrupt by that point 337_check_test_img -r all 338 339echo 340echo "=== Writing to an unaligned preallocated zero cluster ===" 341echo 342 343_make_test_img 64M 344 345# Allocate the L2 table 346$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 347# Pretend there is a preallocated zero cluster somewhere inside the 348# image header 349poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01" 350# Let's write to it! 351$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 352 353echo '--- Repairing ---' 354_check_test_img -r all 355 356echo 357echo '=== Discarding with an unaligned refblock ===' 358echo 359 360_make_test_img 64M 361 362$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 363# Make our refblock unaligned 364poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00" 365# Now try to discard something that will be submitted as two requests 366# (main part + tail) 367$QEMU_IO -c "discard 0 65537" "$TEST_IMG" 368 369echo '--- Repairing ---' 370# Fails the first repair because the corruption prevents the check 371# function from double-checking 372# (Using -q for the first invocation, because otherwise the 373# double-check error message appears above the summary for some 374# reason -- so let's just hide the summary) 375_check_test_img -q -r all 376_check_test_img -r all 377 378echo 379echo "=== Discarding an out-of-bounds refblock ===" 380echo 381 382_make_test_img 64M 383 384# Pretend there's a refblock really up high 385poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00" 386# Let's try to shrink the qcow2 image so that the block driver tries 387# to discard that refblock (and see what happens!) 388$QEMU_IMG resize --shrink "$TEST_IMG" 32M 389 390echo '--- Checking and retrying ---' 391# Image should not be resized 392_img_info | grep 'virtual size' 393# But it should pass this check, because the "partial" resize has 394# already overwritten refblocks past the end 395_check_test_img -r all 396# So let's try again 397$QEMU_IMG resize --shrink "$TEST_IMG" 32M 398_img_info | grep 'virtual size' 399 400echo 401echo "=== Discarding a non-covered in-bounds refblock ===" 402echo 403 404IMGOPTS='refcount_bits=1' _make_test_img 64M 405 406# Pretend there's a refblock somewhere where there is no refblock to 407# cover it (but the covering refblock has a valid index in the 408# reftable) 409# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point 410# to 0x10_0000_0000 (64G) to point to the third refblock 411poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 412$QEMU_IMG resize --shrink "$TEST_IMG" 32M 413 414echo '--- Checking and retrying ---' 415# Image should not be resized 416_img_info | grep 'virtual size' 417# But it should pass this check, because the "partial" resize has 418# already overwritten refblocks past the end 419_check_test_img -r all 420# So let's try again 421$QEMU_IMG resize --shrink "$TEST_IMG" 32M 422_img_info | grep 'virtual size' 423 424echo 425echo "=== Discarding a refblock covered by an unaligned refblock ===" 426echo 427 428IMGOPTS='refcount_bits=1' _make_test_img 64M 429 430# Same as above 431poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 432# But now we actually "create" an unaligned third refblock 433poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00" 434$QEMU_IMG resize --shrink "$TEST_IMG" 32M 435 436echo '--- Repairing ---' 437# Fails the first repair because the corruption prevents the check 438# function from double-checking 439# (Using -q for the first invocation, because otherwise the 440# double-check error message appears above the summary for some 441# reason -- so let's just hide the summary) 442_check_test_img -q -r all 443_check_test_img -r all 444 445echo 446echo "=== Testing the QEMU shutdown with a corrupted image ===" 447echo 448_make_test_img 64M 449poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 450echo "{'execute': 'qmp_capabilities'} 451 {'execute': 'human-monitor-command', 452 'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}} 453 {'execute': 'quit'}" \ 454 | $QEMU -qmp stdio -nographic -nodefaults \ 455 -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \ 456 | _filter_qmp | _filter_qemu_io 457 458echo 459echo "=== Testing incoming inactive corrupted image ===" 460echo 461 462_make_test_img 64M 463# Create an unaligned L1 entry, so qemu will signal a corruption when 464# reading from the covered area 465poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a" 466 467# Inactive images are effectively read-only images, so this should be a 468# non-fatal corruption (which does not modify the image) 469echo "{'execute': 'qmp_capabilities'} 470 {'execute': 'human-monitor-command', 471 'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}} 472 {'execute': 'quit'}" \ 473 | $QEMU -qmp stdio -nographic -nodefaults \ 474 -blockdev "{'node-name': 'drive', 475 'driver': 'qcow2', 476 'file': { 477 'driver': 'file', 478 'filename': '$TEST_IMG' 479 }}" \ 480 -incoming exec:'cat /dev/null' \ 481 2>&1 \ 482 | _filter_qmp | _filter_qemu_io | _filter_io_error 483 484echo 485# Image should not have been marked corrupt 486_img_info --format-specific | grep 'corrupt:' 487 488# success, all done 489echo "*** done" 490rm -f $seq.full 491status=0 492