1#!/usr/bin/env bash 2# group: rw auto quick 3# 4# Test case for image corruption (overlapping data structures) in qcow2 5# 6# Copyright (C) 2013 Red Hat, Inc. 7# 8# This program is free software; you can redistribute it and/or modify 9# it under the terms of the GNU General Public License as published by 10# the Free Software Foundation; either version 2 of the License, or 11# (at your option) any later version. 12# 13# This program is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16# GNU General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with this program. If not, see <http://www.gnu.org/licenses/>. 20# 21 22# creator 23owner=hreitz@redhat.com 24 25seq="$(basename $0)" 26echo "QA output created by $seq" 27 28status=1 # failure is the default! 29 30_cleanup() 31{ 32 _cleanup_test_img 33} 34trap "_cleanup; exit \$status" 0 1 2 3 15 35 36# Sometimes the error line might be dumped before/after an event 37# randomly. Mask it out for specific test that may trigger this 38# uncertainty for current test for now. 39_filter_io_error() 40{ 41 sed '/Input\/output error/d' 42} 43 44# get standard environment, filters and checks 45. ./common.rc 46. ./common.filter 47 48# This tests qcow2-specific low-level functionality 49_supported_fmt qcow2 50_supported_proto file fuse 51_supported_os Linux 52# These tests only work for compat=1.1 images without an external 53# data file with refcount_bits=16 54_unsupported_imgopts 'compat=0.10' data_file \ 55 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' 56 57# The repair process will create a large file - so check for availability first 58_require_large_file 64G 59 60rt_offset=65536 # 0x10000 (XXX: just an assumption) 61rb_offset=131072 # 0x20000 (XXX: just an assumption) 62l1_offset=196608 # 0x30000 (XXX: just an assumption) 63l2_offset=262144 # 0x40000 (XXX: just an assumption) 64l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption) 65 66OPEN_RW="open -o overlap-check=all $TEST_IMG" 67# Overlap checks are done before write operations only, therefore opening an 68# image read-only makes the overlap-check option irrelevant 69OPEN_RO="open -r $TEST_IMG" 70 71echo 72echo "=== Testing L2 reference into L1 ===" 73echo 74_make_test_img 64M 75# Link first L1 entry (first L2 table) onto itself 76# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any 77# later write will result in a COW operation, effectively ruining this attempt 78# on image corruption) 79poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00" 80_check_test_img 81 82# The corrupt bit should not be set anyway 83_qcow2_dump_header | grep incompatible_features 84 85# Try to write something, thereby forcing the corrupt bit to be set 86$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 87 88# The corrupt bit must now be set 89_qcow2_dump_header | grep incompatible_features 90 91# This information should be available through qemu-img info 92_img_info --format-specific 93 94# Try to open the image R/W (which should fail) 95$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \ 96 | _filter_testdir \ 97 | _filter_imgfmt 98 99# Try to open it RO (which should succeed) 100$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io 101 102# We could now try to fix the image, but this would probably fail (how should an 103# L2 table linked onto the L1 table be fixed?) 104 105echo 106echo "=== Testing cluster data reference into refcount block ===" 107echo 108_make_test_img 64M 109# Allocate L2 table 110truncate -s "$(($l2_offset+65536))" "$TEST_IMG" 111poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00" 112# Mark cluster as used 113poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01" 114# Redirect new data cluster onto refcount block 115poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00" 116_check_test_img 117_qcow2_dump_header | grep incompatible_features 118$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 119_qcow2_dump_header | grep incompatible_features 120 121# Try to fix it 122_check_test_img -r all 123 124# The corrupt bit should be cleared 125_qcow2_dump_header | grep incompatible_features 126 127# Look if it's really really fixed 128$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 129_qcow2_dump_header | grep incompatible_features 130 131echo 132echo "=== Testing cluster data reference into inactive L2 table ===" 133echo 134_make_test_img 64M 135$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io 136$QEMU_IMG snapshot -c foo "$TEST_IMG" 137$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io 138# The inactive L2 table remains at its old offset 139poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \ 140 "\x80\x00\x00\x00\x00\x04\x00\x00" 141_check_test_img 142_qcow2_dump_header | grep incompatible_features 143$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io 144_qcow2_dump_header | grep incompatible_features 145_check_test_img -r all 146_qcow2_dump_header | grep incompatible_features 147$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io 148_qcow2_dump_header | grep incompatible_features 149 150# Check data 151$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io 152$QEMU_IMG snapshot -a foo "$TEST_IMG" 153_check_test_img 154$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io 155 156echo 157echo "=== Testing overlap while COW is in flight ===" 158echo 159BACKING_IMG=$TEST_IMG.base 160TEST_IMG=$BACKING_IMG _make_test_img 1G 161 162$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io 163 164_make_test_img -b "$BACKING_IMG" -F $IMGFMT 1G 165# Write two clusters, the second one enforces creation of an L2 table after 166# the first data cluster. 167$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io 168# Free the first cluster. This cluster will soon enough be reallocated and 169# used for COW. 170poke_file "$TEST_IMG" "$l2_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 171poke_file "$TEST_IMG" "$(($rb_offset+10))" "\x00\x00" 172# Now, corrupt the image by marking the second L2 table cluster as free. 173poke_file "$TEST_IMG" "$(($rb_offset+12))" "\x00\x00" 174# Start a write operation requiring COW on the image stopping it right before 175# doing the read; then, trigger the corruption prevention by writing anything to 176# any unallocated cluster, leading to an attempt to overwrite the second L2 177# table. Finally, resume the COW write and see it fail (but not crash). 178echo "open -o file.driver=blkdebug $TEST_IMG 179break cow_read 0 180aio_write 0k 1k 181wait_break 0 182write 64k 64k 183resume 0" | $QEMU_IO | _filter_qemu_io 184 185echo 186echo "=== Testing unallocated image header ===" 187echo 188_make_test_img 64M 189# Create L1/L2 190$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 191poke_file "$TEST_IMG" "$rb_offset" "\x00\x00" 192$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 193 194echo 195echo "=== Testing unaligned L1 entry ===" 196echo 197_make_test_img 64M 198$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 199# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are 200# aligned or not does not matter 201poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 202$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 203 204# Test how well zero cluster expansion can cope with this 205_make_test_img 64M 206$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 207poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 208$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 209 210echo 211echo "=== Testing unaligned L2 entry ===" 212echo 213_make_test_img 64M 214$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 215poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 216$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 217 218echo 219echo "=== Testing unaligned pre-allocated zero cluster ===" 220echo 221_make_test_img 64M 222$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 223poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01" 224# zero cluster expansion 225$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 226 227echo 228echo "=== Testing unaligned reftable entry ===" 229echo 230_make_test_img 64M 231poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00" 232$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 233 234echo 235echo "=== Testing non-fatal corruption on freeing ===" 236echo 237_make_test_img 64M 238$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 239poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 240$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 241 242echo 243echo "=== Testing read-only corruption report ===" 244echo 245_make_test_img 64M 246$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 247poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 248# Should only emit a single error message 249$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io 250 251echo 252echo "=== Testing non-fatal and then fatal corruption report ===" 253echo 254_make_test_img 64M 255$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 256poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 257poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00" 258# Should emit two error messages 259$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io 260 261echo 262echo "=== Testing empty refcount table ===" 263echo 264_make_test_img 64M 265poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 266$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 267# Repair the image 268_check_test_img -r all 269 270echo 271echo "=== Testing empty refcount table with valid L1 and L2 tables ===" 272echo 273_make_test_img 64M 274$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 275poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 276# Since the first data cluster is already allocated this triggers an 277# allocation with an explicit offset (using qcow2_alloc_clusters_at()) 278# causing a refcount block to be allocated at offset 0 279$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 280# Repair the image 281_check_test_img -r all 282 283echo 284echo "=== Testing empty refcount block ===" 285echo 286_make_test_img 64M 287poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 288$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 289# Repair the image 290_check_test_img -r all 291 292echo 293echo "=== Testing empty refcount block with compressed write ===" 294echo 295_make_test_img 64M 296$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 297poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 298# The previous write already allocated an L2 table, so now this new 299# write will try to allocate a compressed data cluster at offset 0. 300$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io 301# Repair the image 302_check_test_img -r all 303 304echo 305echo "=== Testing zero refcount table size ===" 306echo 307_make_test_img 64M 308poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00" 309$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 310# Repair the image 311_check_test_img -r all 312 313echo 314echo "=== Testing incorrect refcount table offset ===" 315echo 316_make_test_img 64M 317poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00" 318$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 319 320echo 321echo "=== Testing dirty corrupt image ===" 322echo 323 324_make_test_img 64M 325 326# Let the refblock appear unaligned 327poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00" 328# Mark the image dirty, thus forcing an automatic check when opening it 329$PYTHON qcow2.py "$TEST_IMG" set-feature-bit incompatible 0 330# Open the image (qemu should refuse to do so) 331$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 332 333echo '--- Repairing ---' 334 335# The actual repair should have happened (because of the dirty bit), 336# but some cleanup may have failed (like freeing the old reftable) 337# because the image was already marked corrupt by that point 338_check_test_img -r all 339 340echo 341echo "=== Writing to an unaligned preallocated zero cluster ===" 342echo 343 344_make_test_img 64M 345 346# Allocate the L2 table 347$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 348# Pretend there is a preallocated zero cluster somewhere inside the 349# image header 350poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01" 351# Let's write to it! 352$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 353 354echo '--- Repairing ---' 355_check_test_img -r all 356 357echo 358echo '=== Discarding with an unaligned refblock ===' 359echo 360 361_make_test_img 64M 362 363$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 364# Make our refblock unaligned 365poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00" 366# Now try to discard something that will be submitted as two requests 367# (main part + tail) 368$QEMU_IO -c "discard 0 65537" "$TEST_IMG" 369 370echo '--- Repairing ---' 371# Fails the first repair because the corruption prevents the check 372# function from double-checking 373# (Using -q for the first invocation, because otherwise the 374# double-check error message appears above the summary for some 375# reason -- so let's just hide the summary) 376_check_test_img -q -r all 377_check_test_img -r all 378 379echo 380echo "=== Discarding an out-of-bounds refblock ===" 381echo 382 383_make_test_img 64M 384 385# Pretend there's a refblock really up high 386poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00" 387# Let's try to shrink the qcow2 image so that the block driver tries 388# to discard that refblock (and see what happens!) 389$QEMU_IMG resize --shrink "$TEST_IMG" 32M 390 391echo '--- Checking and retrying ---' 392# Image should not be resized 393_img_info | grep 'virtual size' 394# But it should pass this check, because the "partial" resize has 395# already overwritten refblocks past the end 396_check_test_img -r all 397# So let's try again 398$QEMU_IMG resize --shrink "$TEST_IMG" 32M 399_img_info | grep 'virtual size' 400 401echo 402echo "=== Discarding a non-covered in-bounds refblock ===" 403echo 404 405_make_test_img -o 'refcount_bits=1' 64M 406 407# Pretend there's a refblock somewhere where there is no refblock to 408# cover it (but the covering refblock has a valid index in the 409# reftable) 410# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point 411# to 0x10_0000_0000 (64G) to point to the third refblock 412poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 413$QEMU_IMG resize --shrink "$TEST_IMG" 32M 414 415echo '--- Checking and retrying ---' 416# Image should not be resized 417_img_info | grep 'virtual size' 418# But it should pass this check, because the "partial" resize has 419# already overwritten refblocks past the end 420_check_test_img -r all 421# So let's try again 422$QEMU_IMG resize --shrink "$TEST_IMG" 32M 423_img_info | grep 'virtual size' 424 425echo 426echo "=== Discarding a refblock covered by an unaligned refblock ===" 427echo 428 429_make_test_img -o 'refcount_bits=1' 64M 430 431# Same as above 432poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 433# But now we actually "create" an unaligned third refblock 434poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00" 435$QEMU_IMG resize --shrink "$TEST_IMG" 32M 436 437echo '--- Repairing ---' 438# Fails the first repair because the corruption prevents the check 439# function from double-checking 440# (Using -q for the first invocation, because otherwise the 441# double-check error message appears above the summary for some 442# reason -- so let's just hide the summary) 443_check_test_img -q -r all 444_check_test_img -r all 445 446echo 447echo "=== Testing the QEMU shutdown with a corrupted image ===" 448echo 449_make_test_img 64M 450poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 451echo "{'execute': 'qmp_capabilities'} 452 {'execute': 'human-monitor-command', 453 'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}} 454 {'execute': 'quit'}" \ 455 | $QEMU -qmp stdio -nographic -nodefaults \ 456 -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \ 457 | _filter_qmp | _filter_qemu_io 458 459echo 460echo "=== Testing incoming inactive corrupted image ===" 461echo 462 463_make_test_img 64M 464# Create an unaligned L1 entry, so qemu will signal a corruption when 465# reading from the covered area 466poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a" 467 468# Inactive images are effectively read-only images, so this should be a 469# non-fatal corruption (which does not modify the image) 470echo "{'execute': 'qmp_capabilities'} 471 {'execute': 'human-monitor-command', 472 'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}} 473 {'execute': 'quit'}" \ 474 | $QEMU -qmp stdio -nographic -nodefaults \ 475 -blockdev "{'node-name': 'drive', 476 'driver': 'qcow2', 477 'file': { 478 'driver': 'file', 479 'filename': '$TEST_IMG' 480 }}" \ 481 -incoming exec:'cat /dev/null' \ 482 2>&1 \ 483 | _filter_qmp | _filter_qemu_io | _filter_io_error 484 485echo 486# Image should not have been marked corrupt 487_img_info --format-specific | grep 'corrupt:' 488 489# success, all done 490echo "*** done" 491rm -f $seq.full 492status=0 493