#!/usr/bin/env bash # # Test case for image corruption (overlapping data structures) in qcow2 # # Copyright (C) 2013 Red Hat, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # creator owner=mreitz@redhat.com seq="$(basename $0)" echo "QA output created by $seq" status=1 # failure is the default! _cleanup() { _cleanup_test_img } trap "_cleanup; exit \$status" 0 1 2 3 15 # Sometimes the error line might be dumped before/after an event # randomly. Mask it out for specific test that may trigger this # uncertainty for current test for now. _filter_io_error() { sed '/Input\/output error/d' } # get standard environment, filters and checks . ./common.rc . ./common.filter # This tests qocw2-specific low-level functionality _supported_fmt qcow2 _supported_proto file _supported_os Linux # The repair process will create a large file - so check for availability first _require_large_file 64G rt_offset=65536 # 0x10000 (XXX: just an assumption) rb_offset=131072 # 0x20000 (XXX: just an assumption) l1_offset=196608 # 0x30000 (XXX: just an assumption) l2_offset=262144 # 0x40000 (XXX: just an assumption) l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption) IMGOPTS="compat=1.1" OPEN_RW="open -o overlap-check=all $TEST_IMG" # Overlap checks are done before write operations only, therefore opening an # image read-only makes the overlap-check option irrelevant OPEN_RO="open -r $TEST_IMG" echo echo "=== Testing L2 reference into L1 ===" echo _make_test_img 64M # Link first L1 entry (first L2 table) onto itself # (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any # later write will result in a COW operation, effectively ruining this attempt # on image corruption) poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00" _check_test_img # The corrupt bit should not be set anyway $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features # Try to write something, thereby forcing the corrupt bit to be set $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io # The corrupt bit must now be set $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features # This information should be available through qemu-img info _img_info --format-specific # Try to open the image R/W (which should fail) $QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \ | _filter_testdir \ | _filter_imgfmt # Try to open it RO (which should succeed) $QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io # We could now try to fix the image, but this would probably fail (how should an # L2 table linked onto the L1 table be fixed?) echo echo "=== Testing cluster data reference into refcount block ===" echo _make_test_img 64M # Allocate L2 table truncate -s "$(($l2_offset+65536))" "$TEST_IMG" poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00" # Mark cluster as used poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01" # Redirect new data cluster onto refcount block poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00" _check_test_img $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features # Try to fix it _check_test_img -r all # The corrupt bit should be cleared $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features # Look if it's really really fixed $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features echo echo "=== Testing cluster data reference into inactive L2 table ===" echo _make_test_img 64M $QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io $QEMU_IMG snapshot -c foo "$TEST_IMG" $QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io # The inactive L2 table remains at its old offset poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \ "\x80\x00\x00\x00\x00\x04\x00\x00" _check_test_img $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features $QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features _check_test_img -r all $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features $QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io $PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features # Check data $QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io $QEMU_IMG snapshot -a foo "$TEST_IMG" _check_test_img $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io echo echo "=== Testing overlap while COW is in flight ===" echo BACKING_IMG=$TEST_IMG.base TEST_IMG=$BACKING_IMG _make_test_img 1G $QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io # compat=0.10 is required in order to make the following discard actually # unallocate the sector rather than make it a zero sector - we want COW, after # all. IMGOPTS='compat=0.10' _make_test_img -b "$BACKING_IMG" 1G # Write two clusters, the second one enforces creation of an L2 table after # the first data cluster. $QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io # Discard the first cluster. This cluster will soon enough be reallocated and # used for COW. $QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io # Now, corrupt the image by marking the second L2 table cluster as free. poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c # Start a write operation requiring COW on the image stopping it right before # doing the read; then, trigger the corruption prevention by writing anything to # any unallocated cluster, leading to an attempt to overwrite the second L2 # table. Finally, resume the COW write and see it fail (but not crash). echo "open -o file.driver=blkdebug $TEST_IMG break cow_read 0 aio_write 0k 1k wait_break 0 write 64k 64k resume 0" | $QEMU_IO | _filter_qemu_io echo echo "=== Testing unallocated image header ===" echo _make_test_img 64M # Create L1/L2 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io poke_file "$TEST_IMG" "$rb_offset" "\x00\x00" $QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io echo echo "=== Testing unaligned L1 entry ===" echo _make_test_img 64M $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io # This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are # aligned or not does not matter poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" $QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io # Test how well zero cluster expansion can cope with this _make_test_img 64M $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" $QEMU_IMG amend -o compat=0.10 "$TEST_IMG" echo echo "=== Testing unaligned L2 entry ===" echo _make_test_img 64M $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" $QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io echo echo "=== Testing unaligned pre-allocated zero cluster ===" echo _make_test_img 64M $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01" # zero cluster expansion $QEMU_IMG amend -o compat=0.10 "$TEST_IMG" echo echo "=== Testing unaligned reftable entry ===" echo _make_test_img 64M poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00" $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io echo echo "=== Testing non-fatal corruption on freeing ===" echo _make_test_img 64M $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" $QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io echo echo "=== Testing read-only corruption report ===" echo _make_test_img 64M $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" # Should only emit a single error message $QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io echo echo "=== Testing non-fatal and then fatal corruption report ===" echo _make_test_img 64M $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00" # Should emit two error messages $QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io echo echo "=== Testing empty refcount table ===" echo _make_test_img 64M poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io # Repair the image _check_test_img -r all echo echo "=== Testing empty refcount table with valid L1 and L2 tables ===" echo _make_test_img 64M $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" # Since the first data cluster is already allocated this triggers an # allocation with an explicit offset (using qcow2_alloc_clusters_at()) # causing a refcount block to be allocated at offset 0 $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io # Repair the image _check_test_img -r all echo echo "=== Testing empty refcount block ===" echo _make_test_img 64M poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io # Repair the image _check_test_img -r all echo echo "=== Testing empty refcount block with compressed write ===" echo _make_test_img 64M $QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" # The previous write already allocated an L2 table, so now this new # write will try to allocate a compressed data cluster at offset 0. $QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io # Repair the image _check_test_img -r all echo echo "=== Testing zero refcount table size ===" echo _make_test_img 64M poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00" $QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt # Repair the image _check_test_img -r all echo echo "=== Testing incorrect refcount table offset ===" echo _make_test_img 64M poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00" $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io echo echo "=== Testing dirty corrupt image ===" echo _make_test_img 64M # Let the refblock appear unaligned poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00" # Mark the image dirty, thus forcing an automatic check when opening it poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01" # Open the image (qemu should refuse to do so) $QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt echo '--- Repairing ---' # The actual repair should have happened (because of the dirty bit), # but some cleanup may have failed (like freeing the old reftable) # because the image was already marked corrupt by that point _check_test_img -r all echo echo "=== Writing to an unaligned preallocated zero cluster ===" echo _make_test_img 64M # Allocate the L2 table $QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io # Pretend there is a preallocated zero cluster somewhere inside the # image header poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01" # Let's write to it! $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io echo '--- Repairing ---' _check_test_img -r all echo echo '=== Discarding with an unaligned refblock ===' echo _make_test_img 64M $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io # Make our refblock unaligned poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00" # Now try to discard something that will be submitted as two requests # (main part + tail) $QEMU_IO -c "discard 0 65537" "$TEST_IMG" echo '--- Repairing ---' # Fails the first repair because the corruption prevents the check # function from double-checking # (Using -q for the first invocation, because otherwise the # double-check error message appears above the summary for some # reason -- so let's just hide the summary) _check_test_img -q -r all _check_test_img -r all echo echo "=== Discarding an out-of-bounds refblock ===" echo _make_test_img 64M # Pretend there's a refblock really up high poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00" # Let's try to shrink the qcow2 image so that the block driver tries # to discard that refblock (and see what happens!) $QEMU_IMG resize --shrink "$TEST_IMG" 32M echo '--- Checking and retrying ---' # Image should not be resized _img_info | grep 'virtual size' # But it should pass this check, because the "partial" resize has # already overwritten refblocks past the end _check_test_img -r all # So let's try again $QEMU_IMG resize --shrink "$TEST_IMG" 32M _img_info | grep 'virtual size' echo echo "=== Discarding a non-covered in-bounds refblock ===" echo IMGOPTS='refcount_bits=1' _make_test_img 64M # Pretend there's a refblock somewhere where there is no refblock to # cover it (but the covering refblock has a valid index in the # reftable) # Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point # to 0x10_0000_0000 (64G) to point to the third refblock poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" $QEMU_IMG resize --shrink "$TEST_IMG" 32M echo '--- Checking and retrying ---' # Image should not be resized _img_info | grep 'virtual size' # But it should pass this check, because the "partial" resize has # already overwritten refblocks past the end _check_test_img -r all # So let's try again $QEMU_IMG resize --shrink "$TEST_IMG" 32M _img_info | grep 'virtual size' echo echo "=== Discarding a refblock covered by an unaligned refblock ===" echo IMGOPTS='refcount_bits=1' _make_test_img 64M # Same as above poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" # But now we actually "create" an unaligned third refblock poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00" $QEMU_IMG resize --shrink "$TEST_IMG" 32M echo '--- Repairing ---' # Fails the first repair because the corruption prevents the check # function from double-checking # (Using -q for the first invocation, because otherwise the # double-check error message appears above the summary for some # reason -- so let's just hide the summary) _check_test_img -q -r all _check_test_img -r all echo echo "=== Testing the QEMU shutdown with a corrupted image ===" echo _make_test_img 64M poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" echo "{'execute': 'qmp_capabilities'} {'execute': 'human-monitor-command', 'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}} {'execute': 'quit'}" \ | $QEMU -qmp stdio -nographic -nodefaults \ -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \ | _filter_qmp | _filter_qemu_io echo echo "=== Testing incoming inactive corrupted image ===" echo _make_test_img 64M # Create an unaligned L1 entry, so qemu will signal a corruption when # reading from the covered area poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a" # Inactive images are effectively read-only images, so this should be a # non-fatal corruption (which does not modify the image) echo "{'execute': 'qmp_capabilities'} {'execute': 'human-monitor-command', 'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}} {'execute': 'quit'}" \ | $QEMU -qmp stdio -nographic -nodefaults \ -blockdev "{'node-name': 'drive', 'driver': 'qcow2', 'file': { 'driver': 'file', 'filename': '$TEST_IMG' }}" \ -incoming exec:'cat /dev/null' \ 2>&1 \ | _filter_qmp | _filter_qemu_io | _filter_io_error echo # Image should not have been marked corrupt _img_info --format-specific | grep 'corrupt:' # success, all done echo "*** done" rm -f $seq.full status=0