1 #!/usr/bin/env bash 2 # group: rw auto quick 3 # 4 # Test case for image corruption (overlapping data structures) in qcow2 5 # 6 # Copyright (C) 2013 Red Hat, Inc. 7 # 8 # This program is free software; you can redistribute it and/or modify 9 # it under the terms of the GNU General Public License as published by 10 # the Free Software Foundation; either version 2 of the License, or 11 # (at your option) any later version. 12 # 13 # This program is distributed in the hope that it will be useful, 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 # GNU General Public License for more details. 17 # 18 # You should have received a copy of the GNU General Public License 19 # along with this program. If not, see <http://www.gnu.org/licenses/>. 20 # 21 22 # creator 23 owner=hreitz@redhat.com 24 25 seq="$(basename $0)" 26 echo "QA output created by $seq" 27 28 status=1 # failure is the default! 29 30 _cleanup() 31 { 32 _cleanup_test_img 33 } 34 trap "_cleanup; exit \$status" 0 1 2 3 15 35 36 # Sometimes the error line might be dumped before/after an event 37 # randomly. Mask it out for specific test that may trigger this 38 # uncertainty for current test for now. 39 _filter_io_error() 40 { 41 sed '/Input\/output error/d' 42 } 43 44 # get standard environment, filters and checks 45 . ./common.rc 46 . ./common.filter 47 48 # This tests qcow2-specific low-level functionality 49 _supported_fmt qcow2 50 _supported_proto file fuse 51 _supported_os Linux 52 # These tests only work for compat=1.1 images without an external 53 # data file with refcount_bits=16 54 _unsupported_imgopts 'compat=0.10' data_file \ 55 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' 56 57 # The repair process will create a large file - so check for availability first 58 _require_large_file 64G 59 60 rt_offset=65536 # 0x10000 (XXX: just an assumption) 61 rb_offset=131072 # 0x20000 (XXX: just an assumption) 62 l1_offset=196608 # 0x30000 (XXX: just an assumption) 63 l2_offset=262144 # 0x40000 (XXX: just an assumption) 64 l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption) 65 66 OPEN_RW="open -o overlap-check=all $TEST_IMG" 67 # Overlap checks are done before write operations only, therefore opening an 68 # image read-only makes the overlap-check option irrelevant 69 OPEN_RO="open -r $TEST_IMG" 70 71 echo 72 echo "=== Testing L2 reference into L1 ===" 73 echo 74 _make_test_img 64M 75 # Link first L1 entry (first L2 table) onto itself 76 # (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any 77 # later write will result in a COW operation, effectively ruining this attempt 78 # on image corruption) 79 poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00" 80 _check_test_img 81 82 # The corrupt bit should not be set anyway 83 _qcow2_dump_header | grep incompatible_features 84 85 # Try to write something, thereby forcing the corrupt bit to be set 86 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 87 88 # The corrupt bit must now be set 89 _qcow2_dump_header | grep incompatible_features 90 91 # This information should be available through qemu-img info 92 _img_info --format-specific 93 94 # Try to open the image R/W (which should fail) 95 $QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \ 96 | _filter_testdir \ 97 | _filter_imgfmt 98 99 # Try to open it RO (which should succeed) 100 $QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io 101 102 # We could now try to fix the image, but this would probably fail (how should an 103 # L2 table linked onto the L1 table be fixed?) 104 105 echo 106 echo "=== Testing cluster data reference into refcount block ===" 107 echo 108 _make_test_img 64M 109 # Allocate L2 table 110 truncate -s "$(($l2_offset+65536))" "$TEST_IMG" 111 poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00" 112 # Mark cluster as used 113 poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01" 114 # Redirect new data cluster onto refcount block 115 poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00" 116 _check_test_img 117 _qcow2_dump_header | grep incompatible_features 118 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 119 _qcow2_dump_header | grep incompatible_features 120 121 # Try to fix it 122 _check_test_img -r all 123 124 # The corrupt bit should be cleared 125 _qcow2_dump_header | grep incompatible_features 126 127 # Look if it's really really fixed 128 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io 129 _qcow2_dump_header | grep incompatible_features 130 131 echo 132 echo "=== Testing cluster data reference into inactive L2 table ===" 133 echo 134 _make_test_img 64M 135 $QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io 136 $QEMU_IMG snapshot -c foo "$TEST_IMG" 137 $QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io 138 # The inactive L2 table remains at its old offset 139 poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \ 140 "\x80\x00\x00\x00\x00\x04\x00\x00" 141 _check_test_img 142 _qcow2_dump_header | grep incompatible_features 143 $QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io 144 _qcow2_dump_header | grep incompatible_features 145 _check_test_img -r all 146 _qcow2_dump_header | grep incompatible_features 147 $QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io 148 _qcow2_dump_header | grep incompatible_features 149 150 # Check data 151 $QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io 152 $QEMU_IMG snapshot -a foo "$TEST_IMG" 153 _check_test_img 154 $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io 155 156 echo 157 echo "=== Testing overlap while COW is in flight ===" 158 echo 159 BACKING_IMG=$TEST_IMG.base 160 TEST_IMG=$BACKING_IMG _make_test_img 1G 161 162 $QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io 163 164 _make_test_img -b "$BACKING_IMG" -F $IMGFMT 1G 165 # Write two clusters, the second one enforces creation of an L2 table after 166 # the first data cluster. 167 $QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io 168 # Free the first cluster. This cluster will soon enough be reallocated and 169 # used for COW. 170 poke_file "$TEST_IMG" "$l2_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 171 poke_file "$TEST_IMG" "$(($rb_offset+10))" "\x00\x00" 172 # Now, corrupt the image by marking the second L2 table cluster as free. 173 poke_file "$TEST_IMG" "$(($rb_offset+12))" "\x00\x00" 174 # Start a write operation requiring COW on the image stopping it right before 175 # doing the read; then, trigger the corruption prevention by writing anything to 176 # any unallocated cluster, leading to an attempt to overwrite the second L2 177 # table. Finally, resume the COW write and see it fail (but not crash). 178 echo "open -o file.driver=blkdebug $TEST_IMG 179 break cow_read 0 180 aio_write 0k 1k 181 wait_break 0 182 write 64k 64k 183 resume 0" | $QEMU_IO | _filter_qemu_io 184 185 echo 186 echo "=== Testing unallocated image header ===" 187 echo 188 _make_test_img 64M 189 # Create L1/L2 190 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 191 poke_file "$TEST_IMG" "$rb_offset" "\x00\x00" 192 $QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 193 194 echo 195 echo "=== Testing unaligned L1 entry ===" 196 echo 197 _make_test_img 64M 198 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 199 # This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are 200 # aligned or not does not matter 201 poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 202 $QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 203 204 # Test how well zero cluster expansion can cope with this 205 _make_test_img 64M 206 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 207 poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00" 208 $QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 209 210 echo 211 echo "=== Testing unaligned L2 entry ===" 212 echo 213 _make_test_img 64M 214 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 215 poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 216 $QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io 217 218 echo 219 echo "=== Testing unaligned pre-allocated zero cluster ===" 220 echo 221 _make_test_img 64M 222 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 223 poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01" 224 # zero cluster expansion 225 $QEMU_IMG amend -o compat=0.10 "$TEST_IMG" 226 227 echo 228 echo "=== Testing unaligned reftable entry ===" 229 echo 230 _make_test_img 64M 231 poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00" 232 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 233 234 echo 235 echo "=== Testing non-fatal corruption on freeing ===" 236 echo 237 _make_test_img 64M 238 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 239 poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 240 $QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 241 242 echo 243 echo "=== Testing read-only corruption report ===" 244 echo 245 _make_test_img 64M 246 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 247 poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 248 # Should only emit a single error message 249 $QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io 250 251 echo 252 echo "=== Testing non-fatal and then fatal corruption report ===" 253 echo 254 _make_test_img 64M 255 $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 256 poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00" 257 poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00" 258 # Should emit two error messages 259 $QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io 260 261 echo 262 echo "=== Testing empty refcount table ===" 263 echo 264 _make_test_img 64M 265 poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 266 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 267 # Repair the image 268 _check_test_img -r all 269 270 echo 271 echo "=== Testing empty refcount table with valid L1 and L2 tables ===" 272 echo 273 _make_test_img 64M 274 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 275 poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 276 # Since the first data cluster is already allocated this triggers an 277 # allocation with an explicit offset (using qcow2_alloc_clusters_at()) 278 # causing a refcount block to be allocated at offset 0 279 $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 280 # Repair the image 281 _check_test_img -r all 282 283 echo 284 echo "=== Testing empty refcount block ===" 285 echo 286 _make_test_img 64M 287 poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 288 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 289 # Repair the image 290 _check_test_img -r all 291 292 echo 293 echo "=== Testing empty refcount block with compressed write ===" 294 echo 295 _make_test_img 64M 296 $QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io 297 poke_file "$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 298 # The previous write already allocated an L2 table, so now this new 299 # write will try to allocate a compressed data cluster at offset 0. 300 $QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io 301 # Repair the image 302 _check_test_img -r all 303 304 echo 305 echo "=== Testing zero refcount table size ===" 306 echo 307 _make_test_img 64M 308 poke_file "$TEST_IMG" "56" "\x00\x00\x00\x00" 309 $QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 310 # Repair the image 311 _check_test_img -r all 312 313 echo 314 echo "=== Testing incorrect refcount table offset ===" 315 echo 316 _make_test_img 64M 317 poke_file "$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00" 318 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 319 320 echo 321 echo "=== Testing dirty corrupt image ===" 322 echo 323 324 _make_test_img 64M 325 326 # Let the refblock appear unaligned 327 poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00" 328 # Mark the image dirty, thus forcing an automatic check when opening it 329 $PYTHON qcow2.py "$TEST_IMG" set-feature-bit incompatible 0 330 # Open the image (qemu should refuse to do so) 331 $QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt 332 333 echo '--- Repairing ---' 334 335 # The actual repair should have happened (because of the dirty bit), 336 # but some cleanup may have failed (like freeing the old reftable) 337 # because the image was already marked corrupt by that point 338 _check_test_img -r all 339 340 echo 341 echo "=== Writing to an unaligned preallocated zero cluster ===" 342 echo 343 344 _make_test_img 64M 345 346 # Allocate the L2 table 347 $QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io 348 # Pretend there is a preallocated zero cluster somewhere inside the 349 # image header 350 poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01" 351 # Let's write to it! 352 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io 353 354 echo '--- Repairing ---' 355 _check_test_img -r all 356 357 echo 358 echo '=== Discarding with an unaligned refblock ===' 359 echo 360 361 _make_test_img 64M 362 363 $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io 364 # Make our refblock unaligned 365 poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00" 366 # Now try to discard something that will be submitted as two requests 367 # (main part + tail) 368 $QEMU_IO -c "discard 0 65537" "$TEST_IMG" 369 370 echo '--- Repairing ---' 371 # Fails the first repair because the corruption prevents the check 372 # function from double-checking 373 # (Using -q for the first invocation, because otherwise the 374 # double-check error message appears above the summary for some 375 # reason -- so let's just hide the summary) 376 _check_test_img -q -r all 377 _check_test_img -r all 378 379 echo 380 echo "=== Discarding an out-of-bounds refblock ===" 381 echo 382 383 _make_test_img 64M 384 385 # Pretend there's a refblock really up high 386 poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00" 387 # Let's try to shrink the qcow2 image so that the block driver tries 388 # to discard that refblock (and see what happens!) 389 $QEMU_IMG resize --shrink "$TEST_IMG" 32M 390 391 echo '--- Checking and retrying ---' 392 # Image should not be resized 393 _img_info | grep 'virtual size' 394 # But it should pass this check, because the "partial" resize has 395 # already overwritten refblocks past the end 396 _check_test_img -r all 397 # So let's try again 398 $QEMU_IMG resize --shrink "$TEST_IMG" 32M 399 _img_info | grep 'virtual size' 400 401 echo 402 echo "=== Discarding a non-covered in-bounds refblock ===" 403 echo 404 405 _make_test_img -o 'refcount_bits=1' 64M 406 407 # Pretend there's a refblock somewhere where there is no refblock to 408 # cover it (but the covering refblock has a valid index in the 409 # reftable) 410 # Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point 411 # to 0x10_0000_0000 (64G) to point to the third refblock 412 poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 413 $QEMU_IMG resize --shrink "$TEST_IMG" 32M 414 415 echo '--- Checking and retrying ---' 416 # Image should not be resized 417 _img_info | grep 'virtual size' 418 # But it should pass this check, because the "partial" resize has 419 # already overwritten refblocks past the end 420 _check_test_img -r all 421 # So let's try again 422 $QEMU_IMG resize --shrink "$TEST_IMG" 32M 423 _img_info | grep 'virtual size' 424 425 echo 426 echo "=== Discarding a refblock covered by an unaligned refblock ===" 427 echo 428 429 _make_test_img -o 'refcount_bits=1' 64M 430 431 # Same as above 432 poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00" 433 # But now we actually "create" an unaligned third refblock 434 poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00" 435 $QEMU_IMG resize --shrink "$TEST_IMG" 32M 436 437 echo '--- Repairing ---' 438 # Fails the first repair because the corruption prevents the check 439 # function from double-checking 440 # (Using -q for the first invocation, because otherwise the 441 # double-check error message appears above the summary for some 442 # reason -- so let's just hide the summary) 443 _check_test_img -q -r all 444 _check_test_img -r all 445 446 echo 447 echo "=== Testing the QEMU shutdown with a corrupted image ===" 448 echo 449 _make_test_img 64M 450 poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00" 451 echo "{'execute': 'qmp_capabilities'} 452 {'execute': 'human-monitor-command', 453 'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}} 454 {'execute': 'quit'}" \ 455 | $QEMU -qmp stdio -nographic -nodefaults \ 456 -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \ 457 | _filter_qmp | _filter_qemu_io 458 459 echo 460 echo "=== Testing incoming inactive corrupted image ===" 461 echo 462 463 _make_test_img 64M 464 # Create an unaligned L1 entry, so qemu will signal a corruption when 465 # reading from the covered area 466 poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a" 467 468 # Inactive images are effectively read-only images, so this should be a 469 # non-fatal corruption (which does not modify the image) 470 echo "{'execute': 'qmp_capabilities'} 471 {'execute': 'human-monitor-command', 472 'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}} 473 {'execute': 'quit'}" \ 474 | $QEMU -qmp stdio -nographic -nodefaults \ 475 -blockdev "{'node-name': 'drive', 476 'driver': 'qcow2', 477 'file': { 478 'driver': 'file', 479 'filename': '$TEST_IMG' 480 }}" \ 481 -incoming exec:'cat /dev/null' \ 482 2>&1 \ 483 | _filter_qmp | _filter_qemu_io | _filter_io_error 484 485 echo 486 # Image should not have been marked corrupt 487 _img_info --format-specific | grep 'corrupt:' 488 489 # success, all done 490 echo "*** done" 491 rm -f $seq.full 492 status=0 493