1#!/usr/bin/env bash 2# 3# Test case for image corruption (overlapping data structures) in qcow2 4# 5# Copyright (C) 2013 Red Hat, Inc. 6# 7# This program is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation; either version 2 of the License, or 10# (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with this program. If not, see <http://www.gnu.org/licenses/>. 19# 20 21# creator 22owner=mreitz@redhat.com 23 24seq="$(basename $0)" 25echo "QA output created by $seq" 26 27status=1 # failure is the default! 28 29_cleanup() 30{ 31 _cleanup_test_img 32} 33trap "_cleanup; exit \$status" 0 1 2 3 15 34 35# Sometimes the error line might be dumped before/after an event 36# randomly. Mask it out for specific test that may trigger this 37# uncertainty for current test for now. 38_filter_io_error() 39{ 40 sed '/Input\/output error/d' 41} 42 43# get standard environment, filters and checks 44. ./common.rc 45. ./common.filter 46 47# This tests qocw2-specific low-level functionality 48_supported_fmt qcow2 49_supported_proto file 50_supported_os Linux 51 52rt_offset=65536 # 0x10000 (XXX: just an assumption) 53rb_offset=131072 # 0x20000 (XXX: just an assumption) 54l1_offset=196608 # 0x30000 (XXX: just an assumption) 55l2_offset=262144 # 0x40000 (XXX: just an assumption) 56l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption) 57 58IMGOPTS="compat=1.1" 59 60OPEN_RW="open -o overlap-check=all $TEST_IMG" 61# Overlap checks are done before write operations only, therefore opening an 62# image read-only makes the overlap-check option irrelevant 63OPEN_RO="open -r $TEST_IMG" 64 65echo 66echo "=== Testing L2 reference into L1 ===" 67echo 68_make_test_img 64M 69# Link first L1 entry (first L2 table) onto itself 70# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any 71# later write will result in a COW operation, effectively ruining this attempt 72# on image corruption) 73poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00" 74_check_test_img 75 76# The corrupt bit should not be set anyway 77$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 78 79# Try to write something, thereby forcing the corrupt bit to be set 80$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 81 82# The corrupt bit must now be set 83$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 84 85# This information should be available through qemu-img info 86_img_info --format-specific 87 88# Try to open the image R/W (which should fail) 89$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \ 90 | _filter_testdir \ 91 | _filter_imgfmt 92 93# Try to open it RO (which should succeed) 94$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io 95 96# We could now try to fix the image, but this would probably fail (how should an 97# L2 table linked onto the L1 table be fixed?) 98 99echo 100echo "=== Testing cluster data reference into refcount block ===" 101echo 102_make_test_img 64M 103# Allocate L2 table 104truncate -s "$(($l2_offset+65536))" "$TEST_IMG" 105poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00" 106# Mark cluster as used 107poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01" 108# Redirect new data cluster onto refcount block 109poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00" 110_check_test_img 111$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 112$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 113$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 114 115# Try to fix it 116_check_test_img -r all 117 118# The corrupt bit should be cleared 119$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 120 121# Look if it's really really fixed 122$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 123$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 124 125echo 126echo "=== Testing cluster data reference into inactive L2 table ===" 127echo 128_make_test_img 64M 129$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io 130$QEMU_IMG snapshot -c foo "$TEST_IMG" 131$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io 132# The inactive L2 table remains at its old offset 133poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \ 134 "\x80\x00\x00\x00\x00\x04\x00\x00" 135_check_test_img 136$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 137$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io 138$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 139_check_test_img -r all 140$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 141$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io 142$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features 143 144# Check data 145$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io 146$QEMU_IMG snapshot -a foo "$TEST_IMG" 147_check_test_img 148$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io 149 150echo 151echo "=== Testing overlap while COW is in flight ===" 152echo 153BACKING_IMG=$TEST_IMG.base 154TEST_IMG=$BACKING_IMG _make_test_img 1G 155 156$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io 157 158# compat=0.10 is required in order to make the following discard actually 159# unallocate the sector rather than make it a zero sector - we want COW, after 160# all. 161IMGOPTS='compat=0.10' _make_test_img -b "$BACKING_IMG" 1G 162# Write two clusters, the second one enforces creation of an L2 table after 163# the first data cluster. 164$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io 165# Discard the first cluster. This cluster will soon enough be reallocated and 166# used for COW. 167$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io 168# Now, corrupt the image by marking the second L2 table cluster as free. 169poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c 170# Start a write operation requiring COW on the image stopping it right before 171# doing the read; then, trigger the corruption prevention by writing anything to 172# any unallocated cluster, leading to an attempt to overwrite the second L2 173# table. Finally, resume the COW write and see it fail (but not crash). 174echo "open -o file.driver=blkdebug $TEST_IMG 175break cow_read 0 176aio_write 0k 1k 177wait_break 0 178write 64k 64k 179resume 0" | $QEMU_IO | _filter_qemu_io 180 181echo 182echo "=== Testing unallocated image header ===" 183echo 184_make_test_img 64M 185# Create L1/L2 186$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 187poke_file "$TEST_IMG" "$rb_offset" "\x00\x00" 188$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 189 190echo 191echo "=== Testing unaligned L1 entry ===" 192echo 193_make_test_img 64M 194$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 195# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are 196# aligned or not does not matter 197poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 198$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 199 200# Test how well zero cluster expansion can cope with this 201_make_test_img 64M 202$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 203poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 204$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 205 206echo 207echo "=== Testing unaligned L2 entry ===" 208echo 209_make_test_img 64M 210$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 211poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 212$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 213 214echo 215echo "=== Testing unaligned pre-allocated zero cluster ===" 216echo 217_make_test_img 64M 218$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 219poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01" 220# zero cluster expansion 221$QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 222 223echo 224echo "=== Testing unaligned reftable entry ===" 225echo 226_make_test_img 64M 227poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00" 228$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 229 230echo 231echo "=== Testing non-fatal corruption on freeing ===" 232echo 233_make_test_img 64M 234$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 235poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 236$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 237 238echo 239echo "=== Testing read-only corruption report ===" 240echo 241_make_test_img 64M 242$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 243poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 244# Should only emit a single error message 245$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io 246 247echo 248echo "=== Testing non-fatal and then fatal corruption report ===" 249echo 250_make_test_img 64M 251$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 252poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 253poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00" 254# Should emit two error messages 255$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io 256 257echo 258echo "=== Testing empty refcount table ===" 259echo 260_make_test_img 64M 261poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 262$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 263# Repair the image 264_check_test_img -r all 265 266echo 267echo "=== Testing empty refcount table with valid L1 and L2 tables ===" 268echo 269_make_test_img 64M 270$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 271poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 272# Since the first data cluster is already allocated this triggers an 273# allocation with an explicit offset (using qcow2_alloc_clusters_at()) 274# causing a refcount block to be allocated at offset 0 275$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 276# Repair the image 277_check_test_img -r all 278 279echo 280echo "=== Testing empty refcount block ===" 281echo 282_make_test_img 64M 283poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 284$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 285# Repair the image 286_check_test_img -r all 287 288echo 289echo "=== Testing empty refcount block with compressed write ===" 290echo 291_make_test_img 64M 292$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 293poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 294# The previous write already allocated an L2 table, so now this new 295# write will try to allocate a compressed data cluster at offset 0. 296$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io 297# Repair the image 298_check_test_img -r all 299 300echo 301echo "=== Testing zero refcount table size ===" 302echo 303_make_test_img 64M 304poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00" 305$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 306# Repair the image 307_check_test_img -r all 308 309echo 310echo "=== Testing incorrect refcount table offset ===" 311echo 312_make_test_img 64M 313poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00" 314$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 315 316echo 317echo "=== Testing dirty corrupt image ===" 318echo 319 320_make_test_img 64M 321 322# Let the refblock appear unaligned 323poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00" 324# Mark the image dirty, thus forcing an automatic check when opening it 325poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01" 326# Open the image (qemu should refuse to do so) 327$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 328 329echo '--- Repairing ---' 330 331# The actual repair should have happened (because of the dirty bit), 332# but some cleanup may have failed (like freeing the old reftable) 333# because the image was already marked corrupt by that point 334_check_test_img -r all 335 336echo 337echo "=== Writing to an unaligned preallocated zero cluster ===" 338echo 339 340_make_test_img 64M 341 342# Allocate the L2 table 343$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 344# Pretend there is a preallocated zero cluster somewhere inside the 345# image header 346poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01" 347# Let's write to it! 348$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 349 350echo '--- Repairing ---' 351_check_test_img -r all 352 353echo 354echo '=== Discarding with an unaligned refblock ===' 355echo 356 357_make_test_img 64M 358 359$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 360# Make our refblock unaligned 361poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00" 362# Now try to discard something that will be submitted as two requests 363# (main part + tail) 364$QEMU_IO -c "discard 0 65537" "$TEST_IMG" 365 366echo '--- Repairing ---' 367# Fails the first repair because the corruption prevents the check 368# function from double-checking 369# (Using -q for the first invocation, because otherwise the 370# double-check error message appears above the summary for some 371# reason -- so let's just hide the summary) 372_check_test_img -q -r all 373_check_test_img -r all 374 375echo 376echo "=== Discarding an out-of-bounds refblock ===" 377echo 378 379_make_test_img 64M 380 381# Pretend there's a refblock really up high 382poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00" 383# Let's try to shrink the qcow2 image so that the block driver tries 384# to discard that refblock (and see what happens!) 385$QEMU_IMG resize --shrink "$TEST_IMG" 32M 386 387echo '--- Checking and retrying ---' 388# Image should not be resized 389_img_info | grep 'virtual size' 390# But it should pass this check, because the "partial" resize has 391# already overwritten refblocks past the end 392_check_test_img -r all 393# So let's try again 394$QEMU_IMG resize --shrink "$TEST_IMG" 32M 395_img_info | grep 'virtual size' 396 397echo 398echo "=== Discarding a non-covered in-bounds refblock ===" 399echo 400 401IMGOPTS='refcount_bits=1' _make_test_img 64M 402 403# Pretend there's a refblock somewhere where there is no refblock to 404# cover it (but the covering refblock has a valid index in the 405# reftable) 406# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point 407# to 0x10_0000_0000 (64G) to point to the third refblock 408poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 409$QEMU_IMG resize --shrink "$TEST_IMG" 32M 410 411echo '--- Checking and retrying ---' 412# Image should not be resized 413_img_info | grep 'virtual size' 414# But it should pass this check, because the "partial" resize has 415# already overwritten refblocks past the end 416_check_test_img -r all 417# So let's try again 418$QEMU_IMG resize --shrink "$TEST_IMG" 32M 419_img_info | grep 'virtual size' 420 421echo 422echo "=== Discarding a refblock covered by an unaligned refblock ===" 423echo 424 425IMGOPTS='refcount_bits=1' _make_test_img 64M 426 427# Same as above 428poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 429# But now we actually "create" an unaligned third refblock 430poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00" 431$QEMU_IMG resize --shrink "$TEST_IMG" 32M 432 433echo '--- Repairing ---' 434# Fails the first repair because the corruption prevents the check 435# function from double-checking 436# (Using -q for the first invocation, because otherwise the 437# double-check error message appears above the summary for some 438# reason -- so let's just hide the summary) 439_check_test_img -q -r all 440_check_test_img -r all 441 442echo 443echo "=== Testing the QEMU shutdown with a corrupted image ===" 444echo 445_make_test_img 64M 446poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 447echo "{'execute': 'qmp_capabilities'} 448 {'execute': 'human-monitor-command', 449 'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}} 450 {'execute': 'quit'}" \ 451 | $QEMU -qmp stdio -nographic -nodefaults \ 452 -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \ 453 | _filter_qmp | _filter_qemu_io 454 455echo 456echo "=== Testing incoming inactive corrupted image ===" 457echo 458 459_make_test_img 64M 460# Create an unaligned L1 entry, so qemu will signal a corruption when 461# reading from the covered area 462poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a" 463 464# Inactive images are effectively read-only images, so this should be a 465# non-fatal corruption (which does not modify the image) 466echo "{'execute': 'qmp_capabilities'} 467 {'execute': 'human-monitor-command', 468 'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}} 469 {'execute': 'quit'}" \ 470 | $QEMU -qmp stdio -nographic -nodefaults \ 471 -blockdev "{'node-name': 'drive', 472 'driver': 'qcow2', 473 'file': { 474 'driver': 'file', 475 'filename': '$TEST_IMG' 476 }}" \ 477 -incoming exec:'cat /dev/null' \ 478 2>&1 \ 479 | _filter_qmp | _filter_qemu_io | _filter_io_error 480 481echo 482# Image should not have been marked corrupt 483_img_info --format-specific | grep 'corrupt:' 484 485# success, all done 486echo "*** done" 487rm -f $seq.full 488status=0 489