xref: /openbmc/qemu/tests/qemu-iotests/060 (revision 438c78da)
1#!/bin/bash
2#
3# Test case for image corruption (overlapping data structures) in qcow2
4#
5# Copyright (C) 2013 Red Hat, Inc.
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program.  If not, see <http://www.gnu.org/licenses/>.
19#
20
21# creator
22owner=mreitz@redhat.com
23
24seq="$(basename $0)"
25echo "QA output created by $seq"
26
27here="$PWD"
28status=1	# failure is the default!
29
30_cleanup()
31{
32	_cleanup_test_img
33}
34trap "_cleanup; exit \$status" 0 1 2 3 15
35
36# Sometimes the error line might be dumped before/after an event
37# randomly.  Mask it out for specific test that may trigger this
38# uncertainty for current test for now.
39_filter_io_error()
40{
41    sed '/Input\/output error/d'
42}
43
44# get standard environment, filters and checks
45. ./common.rc
46. ./common.filter
47
48# This tests qocw2-specific low-level functionality
49_supported_fmt qcow2
50_supported_proto file
51_supported_os Linux
52
53rt_offset=65536  # 0x10000 (XXX: just an assumption)
54rb_offset=131072 # 0x20000 (XXX: just an assumption)
55l1_offset=196608 # 0x30000 (XXX: just an assumption)
56l2_offset=262144 # 0x40000 (XXX: just an assumption)
57l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption)
58
59IMGOPTS="compat=1.1"
60
61OPEN_RW="open -o overlap-check=all $TEST_IMG"
62# Overlap checks are done before write operations only, therefore opening an
63# image read-only makes the overlap-check option irrelevant
64OPEN_RO="open -r $TEST_IMG"
65
66echo
67echo "=== Testing L2 reference into L1 ==="
68echo
69_make_test_img 64M
70# Link first L1 entry (first L2 table) onto itself
71# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
72# later write will result in a COW operation, effectively ruining this attempt
73# on image corruption)
74poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
75_check_test_img
76
77# The corrupt bit should not be set anyway
78$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
79
80# Try to write something, thereby forcing the corrupt bit to be set
81$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
82
83# The corrupt bit must now be set
84$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
85
86# This information should be available through qemu-img info
87_img_info --format-specific
88
89# Try to open the image R/W (which should fail)
90$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
91                                            | _filter_testdir \
92                                            | _filter_imgfmt
93
94# Try to open it RO (which should succeed)
95$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io
96
97# We could now try to fix the image, but this would probably fail (how should an
98# L2 table linked onto the L1 table be fixed?)
99
100echo
101echo "=== Testing cluster data reference into refcount block ==="
102echo
103_make_test_img 64M
104# Allocate L2 table
105truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
106poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
107# Mark cluster as used
108poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
109# Redirect new data cluster onto refcount block
110poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
111_check_test_img
112$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
113$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
114$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
115
116# Try to fix it
117_check_test_img -r all
118
119# The corrupt bit should be cleared
120$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
121
122# Look if it's really really fixed
123$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
124$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
125
126echo
127echo "=== Testing cluster data reference into inactive L2 table ==="
128echo
129_make_test_img 64M
130$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io
131$QEMU_IMG snapshot -c foo "$TEST_IMG"
132$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
133# The inactive L2 table remains at its old offset
134poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
135                      "\x80\x00\x00\x00\x00\x04\x00\x00"
136_check_test_img
137$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
138$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
139$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
140_check_test_img -r all
141$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
142$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
143$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
144
145# Check data
146$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
147$QEMU_IMG snapshot -a foo "$TEST_IMG"
148_check_test_img
149$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
150
151echo
152echo "=== Testing overlap while COW is in flight ==="
153echo
154# compat=0.10 is required in order to make the following discard actually
155# unallocate the sector rather than make it a zero sector - we want COW, after
156# all.
157IMGOPTS='compat=0.10' _make_test_img 1G
158# Write two clusters, the second one enforces creation of an L2 table after
159# the first data cluster.
160$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
161# Discard the first cluster. This cluster will soon enough be reallocated and
162# used for COW.
163$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io
164# Now, corrupt the image by marking the second L2 table cluster as free.
165poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c
166# Start a write operation requiring COW on the image stopping it right before
167# doing the read; then, trigger the corruption prevention by writing anything to
168# any unallocated cluster, leading to an attempt to overwrite the second L2
169# table. Finally, resume the COW write and see it fail (but not crash).
170echo "open -o file.driver=blkdebug $TEST_IMG
171break cow_read 0
172aio_write 0k 1k
173wait_break 0
174write 64k 64k
175resume 0" | $QEMU_IO | _filter_qemu_io
176
177echo
178echo "=== Testing unallocated image header ==="
179echo
180_make_test_img 64M
181# Create L1/L2
182$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
183poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
184$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
185
186echo
187echo "=== Testing unaligned L1 entry ==="
188echo
189_make_test_img 64M
190$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
191# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
192# aligned or not does not matter
193poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
194$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
195
196# Test how well zero cluster expansion can cope with this
197_make_test_img 64M
198$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
199poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
200$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
201
202echo
203echo "=== Testing unaligned L2 entry ==="
204echo
205_make_test_img 64M
206$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
207poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
208$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
209
210echo
211echo "=== Testing unaligned pre-allocated zero cluster ==="
212echo
213_make_test_img 64M
214$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
215poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01"
216# zero cluster expansion
217$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
218
219echo
220echo "=== Testing unaligned reftable entry ==="
221echo
222_make_test_img 64M
223poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
224$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
225
226echo
227echo "=== Testing non-fatal corruption on freeing ==="
228echo
229_make_test_img 64M
230$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
231poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
232$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
233
234echo
235echo "=== Testing read-only corruption report ==="
236echo
237_make_test_img 64M
238$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
239poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
240# Should only emit a single error message
241$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
242
243echo
244echo "=== Testing non-fatal and then fatal corruption report ==="
245echo
246_make_test_img 64M
247$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
248poke_file "$TEST_IMG" "$l2_offset"        "\x80\x00\x00\x00\x00\x05\x2a\x00"
249poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
250# Should emit two error messages
251$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
252
253echo
254echo "=== Testing empty refcount table ==="
255echo
256_make_test_img 64M
257poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
258$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
259# Repair the image
260_check_test_img -r all
261
262echo
263echo "=== Testing empty refcount table with valid L1 and L2 tables ==="
264echo
265_make_test_img 64M
266$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
267poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
268# Since the first data cluster is already allocated this triggers an
269# allocation with an explicit offset (using qcow2_alloc_clusters_at())
270# causing a refcount block to be allocated at offset 0
271$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
272# Repair the image
273_check_test_img -r all
274
275echo
276echo "=== Testing empty refcount block ==="
277echo
278_make_test_img 64M
279poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
280$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
281# Repair the image
282_check_test_img -r all
283
284echo
285echo "=== Testing empty refcount block with compressed write ==="
286echo
287_make_test_img 64M
288$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
289poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
290# The previous write already allocated an L2 table, so now this new
291# write will try to allocate a compressed data cluster at offset 0.
292$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io
293# Repair the image
294_check_test_img -r all
295
296echo
297echo "=== Testing zero refcount table size ==="
298echo
299_make_test_img 64M
300poke_file "$TEST_IMG" "56"                "\x00\x00\x00\x00"
301$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
302# Repair the image
303_check_test_img -r all
304
305echo
306echo "=== Testing incorrect refcount table offset ==="
307echo
308_make_test_img 64M
309poke_file "$TEST_IMG" "48"                "\x00\x00\x00\x00\x00\x00\x00\x00"
310$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
311
312echo
313echo "=== Testing dirty corrupt image ==="
314echo
315
316_make_test_img 64M
317
318# Let the refblock appear unaligned
319poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\xff\xff\x2a\x00"
320# Mark the image dirty, thus forcing an automatic check when opening it
321poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01"
322# Open the image (qemu should refuse to do so)
323$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
324
325echo '--- Repairing ---'
326
327# The actual repair should have happened (because of the dirty bit),
328# but some cleanup may have failed (like freeing the old reftable)
329# because the image was already marked corrupt by that point
330_check_test_img -r all
331
332echo
333echo "=== Writing to an unaligned preallocated zero cluster ==="
334echo
335
336_make_test_img 64M
337
338# Allocate the L2 table
339$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
340# Pretend there is a preallocated zero cluster somewhere inside the
341# image header
342poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01"
343# Let's write to it!
344$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
345
346echo '--- Repairing ---'
347_check_test_img -r all
348
349echo
350echo '=== Discarding with an unaligned refblock ==='
351echo
352
353_make_test_img 64M
354
355$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
356# Make our refblock unaligned
357poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00"
358# Now try to discard something that will be submitted as two requests
359# (main part + tail)
360$QEMU_IO -c "discard 0 65537" "$TEST_IMG"
361
362echo '--- Repairing ---'
363# Fails the first repair because the corruption prevents the check
364# function from double-checking
365# (Using -q for the first invocation, because otherwise the
366#  double-check error message appears above the summary for some
367#  reason -- so let's just hide the summary)
368_check_test_img -q -r all
369_check_test_img -r all
370
371echo
372echo "=== Discarding an out-of-bounds refblock ==="
373echo
374
375_make_test_img 64M
376
377# Pretend there's a refblock really up high
378poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00"
379# Let's try to shrink the qcow2 image so that the block driver tries
380# to discard that refblock (and see what happens!)
381$QEMU_IMG resize --shrink "$TEST_IMG" 32M
382
383echo '--- Checking and retrying ---'
384# Image should not be resized
385_img_info | grep 'virtual size'
386# But it should pass this check, because the "partial" resize has
387# already overwritten refblocks past the end
388_check_test_img -r all
389# So let's try again
390$QEMU_IMG resize --shrink "$TEST_IMG" 32M
391_img_info | grep 'virtual size'
392
393echo
394echo "=== Discarding a non-covered in-bounds refblock ==="
395echo
396
397IMGOPTS='refcount_bits=1' _make_test_img 64M
398
399# Pretend there's a refblock somewhere where there is no refblock to
400# cover it (but the covering refblock has a valid index in the
401# reftable)
402# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point
403# to 0x10_0000_0000 (64G) to point to the third refblock
404poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
405$QEMU_IMG resize --shrink "$TEST_IMG" 32M
406
407echo '--- Checking and retrying ---'
408# Image should not be resized
409_img_info | grep 'virtual size'
410# But it should pass this check, because the "partial" resize has
411# already overwritten refblocks past the end
412_check_test_img -r all
413# So let's try again
414$QEMU_IMG resize --shrink "$TEST_IMG" 32M
415_img_info | grep 'virtual size'
416
417echo
418echo "=== Discarding a refblock covered by an unaligned refblock ==="
419echo
420
421IMGOPTS='refcount_bits=1' _make_test_img 64M
422
423# Same as above
424poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
425# But now we actually "create" an unaligned third refblock
426poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00"
427$QEMU_IMG resize --shrink "$TEST_IMG" 32M
428
429echo '--- Repairing ---'
430# Fails the first repair because the corruption prevents the check
431# function from double-checking
432# (Using -q for the first invocation, because otherwise the
433#  double-check error message appears above the summary for some
434#  reason -- so let's just hide the summary)
435_check_test_img -q -r all
436_check_test_img -r all
437
438echo
439echo "=== Testing the QEMU shutdown with a corrupted image ==="
440echo
441_make_test_img 64M
442poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
443echo "{'execute': 'qmp_capabilities'}
444      {'execute': 'human-monitor-command',
445       'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}}
446      {'execute': 'quit'}" \
447    | $QEMU -qmp stdio -nographic -nodefaults \
448            -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \
449    | _filter_qmp | _filter_qemu_io
450
451echo
452echo "=== Testing incoming inactive corrupted image ==="
453echo
454
455_make_test_img 64M
456# Create an unaligned L1 entry, so qemu will signal a corruption when
457# reading from the covered area
458poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a"
459
460# Inactive images are effectively read-only images, so this should be a
461# non-fatal corruption (which does not modify the image)
462echo "{'execute': 'qmp_capabilities'}
463      {'execute': 'human-monitor-command',
464       'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}}
465      {'execute': 'quit'}" \
466    | $QEMU -qmp stdio -nographic -nodefaults \
467            -blockdev "{'node-name': 'drive',
468                        'driver': 'qcow2',
469                        'file': {
470                            'driver': 'file',
471                            'filename': '$TEST_IMG'
472                        }}" \
473            -incoming exec:'cat /dev/null' \
474            2>&1 \
475    | _filter_qmp | _filter_qemu_io | _filter_io_error
476
477echo
478# Image should not have been marked corrupt
479_img_info --format-specific | grep 'corrupt:'
480
481# success, all done
482echo "*** done"
483rm -f $seq.full
484status=0
485