xref: /openbmc/qemu/tests/qemu-iotests/060 (revision a48f7644)
1#!/bin/bash
2#
3# Test case for image corruption (overlapping data structures) in qcow2
4#
5# Copyright (C) 2013 Red Hat, Inc.
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program.  If not, see <http://www.gnu.org/licenses/>.
19#
20
21# creator
22owner=mreitz@redhat.com
23
24seq="$(basename $0)"
25echo "QA output created by $seq"
26
27here="$PWD"
28status=1	# failure is the default!
29
30_cleanup()
31{
32	_cleanup_test_img
33}
34trap "_cleanup; exit \$status" 0 1 2 3 15
35
36# get standard environment, filters and checks
37. ./common.rc
38. ./common.filter
39
40# This tests qocw2-specific low-level functionality
41_supported_fmt qcow2
42_supported_proto file
43_supported_os Linux
44
45rt_offset=65536  # 0x10000 (XXX: just an assumption)
46rb_offset=131072 # 0x20000 (XXX: just an assumption)
47l1_offset=196608 # 0x30000 (XXX: just an assumption)
48l2_offset=262144 # 0x40000 (XXX: just an assumption)
49l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption)
50
51IMGOPTS="compat=1.1"
52
53OPEN_RW="open -o overlap-check=all $TEST_IMG"
54# Overlap checks are done before write operations only, therefore opening an
55# image read-only makes the overlap-check option irrelevant
56OPEN_RO="open -r $TEST_IMG"
57
58echo
59echo "=== Testing L2 reference into L1 ==="
60echo
61_make_test_img 64M
62# Link first L1 entry (first L2 table) onto itself
63# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
64# later write will result in a COW operation, effectively ruining this attempt
65# on image corruption)
66poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
67_check_test_img
68
69# The corrupt bit should not be set anyway
70$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
71
72# Try to write something, thereby forcing the corrupt bit to be set
73$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
74
75# The corrupt bit must now be set
76$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
77
78# This information should be available through qemu-img info
79_img_info --format-specific
80
81# Try to open the image R/W (which should fail)
82$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
83                                            | _filter_testdir \
84                                            | _filter_imgfmt
85
86# Try to open it RO (which should succeed)
87$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io
88
89# We could now try to fix the image, but this would probably fail (how should an
90# L2 table linked onto the L1 table be fixed?)
91
92echo
93echo "=== Testing cluster data reference into refcount block ==="
94echo
95_make_test_img 64M
96# Allocate L2 table
97truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
98poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
99# Mark cluster as used
100poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
101# Redirect new data cluster onto refcount block
102poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
103_check_test_img
104$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
105$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
106$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
107
108# Try to fix it
109_check_test_img -r all
110
111# The corrupt bit should be cleared
112$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
113
114# Look if it's really really fixed
115$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
116$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
117
118echo
119echo "=== Testing cluster data reference into inactive L2 table ==="
120echo
121_make_test_img 64M
122$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io
123$QEMU_IMG snapshot -c foo "$TEST_IMG"
124$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
125# The inactive L2 table remains at its old offset
126poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
127                      "\x80\x00\x00\x00\x00\x04\x00\x00"
128_check_test_img
129$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
130$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
131$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
132_check_test_img -r all
133$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
134$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
135$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
136
137# Check data
138$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
139$QEMU_IMG snapshot -a foo "$TEST_IMG"
140_check_test_img
141$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
142
143echo
144echo "=== Testing overlap while COW is in flight ==="
145echo
146# compat=0.10 is required in order to make the following discard actually
147# unallocate the sector rather than make it a zero sector - we want COW, after
148# all.
149IMGOPTS='compat=0.10' _make_test_img 1G
150# Write two clusters, the second one enforces creation of an L2 table after
151# the first data cluster.
152$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
153# Discard the first cluster. This cluster will soon enough be reallocated and
154# used for COW.
155$QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io
156# Now, corrupt the image by marking the second L2 table cluster as free.
157poke_file "$TEST_IMG" '131084' "\x00\x00" # 0x2000c
158# Start a write operation requiring COW on the image stopping it right before
159# doing the read; then, trigger the corruption prevention by writing anything to
160# any unallocated cluster, leading to an attempt to overwrite the second L2
161# table. Finally, resume the COW write and see it fail (but not crash).
162echo "open -o file.driver=blkdebug $TEST_IMG
163break cow_read 0
164aio_write 0k 1k
165wait_break 0
166write 64k 64k
167resume 0" | $QEMU_IO | _filter_qemu_io
168
169echo
170echo "=== Testing unallocated image header ==="
171echo
172_make_test_img 64M
173# Create L1/L2
174$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
175poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
176$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
177
178echo
179echo "=== Testing unaligned L1 entry ==="
180echo
181_make_test_img 64M
182$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
183# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
184# aligned or not does not matter
185poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
186$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
187
188# Test how well zero cluster expansion can cope with this
189_make_test_img 64M
190$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
191poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
192$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
193
194echo
195echo "=== Testing unaligned L2 entry ==="
196echo
197_make_test_img 64M
198$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
199poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
200$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
201
202echo
203echo "=== Testing unaligned pre-allocated zero cluster ==="
204echo
205_make_test_img 64M
206$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
207poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01"
208# zero cluster expansion
209$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
210
211echo
212echo "=== Testing unaligned reftable entry ==="
213echo
214_make_test_img 64M
215poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
216$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
217
218echo
219echo "=== Testing non-fatal corruption on freeing ==="
220echo
221_make_test_img 64M
222$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
223poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
224$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
225
226echo
227echo "=== Testing read-only corruption report ==="
228echo
229_make_test_img 64M
230$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
231poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
232# Should only emit a single error message
233$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
234
235echo
236echo "=== Testing non-fatal and then fatal corruption report ==="
237echo
238_make_test_img 64M
239$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
240poke_file "$TEST_IMG" "$l2_offset"        "\x80\x00\x00\x00\x00\x05\x2a\x00"
241poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
242# Should emit two error messages
243$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
244
245echo
246echo "=== Testing empty refcount table ==="
247echo
248_make_test_img 64M
249poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
250$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
251# Repair the image
252_check_test_img -r all
253
254echo
255echo "=== Testing empty refcount table with valid L1 and L2 tables ==="
256echo
257_make_test_img 64M
258$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
259poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
260# Since the first data cluster is already allocated this triggers an
261# allocation with an explicit offset (using qcow2_alloc_clusters_at())
262# causing a refcount block to be allocated at offset 0
263$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
264# Repair the image
265_check_test_img -r all
266
267echo
268echo "=== Testing empty refcount block ==="
269echo
270_make_test_img 64M
271poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
272$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
273# Repair the image
274_check_test_img -r all
275
276echo
277echo "=== Testing empty refcount block with compressed write ==="
278echo
279_make_test_img 64M
280$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
281poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
282# The previous write already allocated an L2 table, so now this new
283# write will try to allocate a compressed data cluster at offset 0.
284$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io
285# Repair the image
286_check_test_img -r all
287
288echo
289echo "=== Testing zero refcount table size ==="
290echo
291_make_test_img 64M
292poke_file "$TEST_IMG" "56"                "\x00\x00\x00\x00"
293$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
294# Repair the image
295_check_test_img -r all
296
297echo
298echo "=== Testing incorrect refcount table offset ==="
299echo
300_make_test_img 64M
301poke_file "$TEST_IMG" "48"                "\x00\x00\x00\x00\x00\x00\x00\x00"
302$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
303
304echo
305echo "=== Testing dirty corrupt image ==="
306echo
307
308_make_test_img 64M
309
310# Let the refblock appear unaligned
311poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\xff\xff\x2a\x00"
312# Mark the image dirty, thus forcing an automatic check when opening it
313poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01"
314# Open the image (qemu should refuse to do so)
315$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
316
317echo '--- Repairing ---'
318
319# The actual repair should have happened (because of the dirty bit),
320# but some cleanup may have failed (like freeing the old reftable)
321# because the image was already marked corrupt by that point
322_check_test_img -r all
323
324echo
325echo "=== Writing to an unaligned preallocated zero cluster ==="
326echo
327
328_make_test_img 64M
329
330# Allocate the L2 table
331$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
332# Pretend there is a preallocated zero cluster somewhere inside the
333# image header
334poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01"
335# Let's write to it!
336$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
337
338echo '--- Repairing ---'
339_check_test_img -r all
340
341echo
342echo '=== Discarding with an unaligned refblock ==='
343echo
344
345_make_test_img 64M
346
347$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
348# Make our refblock unaligned
349poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00"
350# Now try to discard something that will be submitted as two requests
351# (main part + tail)
352$QEMU_IO -c "discard 0 65537" "$TEST_IMG"
353
354echo '--- Repairing ---'
355# Fails the first repair because the corruption prevents the check
356# function from double-checking
357# (Using -q for the first invocation, because otherwise the
358#  double-check error message appears above the summary for some
359#  reason -- so let's just hide the summary)
360_check_test_img -q -r all
361_check_test_img -r all
362
363echo
364echo "=== Discarding an out-of-bounds refblock ==="
365echo
366
367_make_test_img 64M
368
369# Pretend there's a refblock really up high
370poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00"
371# Let's try to shrink the qcow2 image so that the block driver tries
372# to discard that refblock (and see what happens!)
373$QEMU_IMG resize --shrink "$TEST_IMG" 32M
374
375echo '--- Checking and retrying ---'
376# Image should not be resized
377_img_info | grep 'virtual size'
378# But it should pass this check, because the "partial" resize has
379# already overwritten refblocks past the end
380_check_test_img -r all
381# So let's try again
382$QEMU_IMG resize --shrink "$TEST_IMG" 32M
383_img_info | grep 'virtual size'
384
385echo
386echo "=== Discarding a non-covered in-bounds refblock ==="
387echo
388
389IMGOPTS='refcount_bits=1' _make_test_img 64M
390
391# Pretend there's a refblock somewhere where there is no refblock to
392# cover it (but the covering refblock has a valid index in the
393# reftable)
394# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point
395# to 0x10_0000_0000 (64G) to point to the third refblock
396poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
397$QEMU_IMG resize --shrink "$TEST_IMG" 32M
398
399echo '--- Checking and retrying ---'
400# Image should not be resized
401_img_info | grep 'virtual size'
402# But it should pass this check, because the "partial" resize has
403# already overwritten refblocks past the end
404_check_test_img -r all
405# So let's try again
406$QEMU_IMG resize --shrink "$TEST_IMG" 32M
407_img_info | grep 'virtual size'
408
409echo
410echo "=== Discarding a refblock covered by an unaligned refblock ==="
411echo
412
413IMGOPTS='refcount_bits=1' _make_test_img 64M
414
415# Same as above
416poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
417# But now we actually "create" an unaligned third refblock
418poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00"
419$QEMU_IMG resize --shrink "$TEST_IMG" 32M
420
421echo '--- Repairing ---'
422# Fails the first repair because the corruption prevents the check
423# function from double-checking
424# (Using -q for the first invocation, because otherwise the
425#  double-check error message appears above the summary for some
426#  reason -- so let's just hide the summary)
427_check_test_img -q -r all
428_check_test_img -r all
429
430echo
431echo "=== Testing the QEMU shutdown with a corrupted image ==="
432echo
433_make_test_img 64M
434poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
435echo "{'execute': 'qmp_capabilities'}
436      {'execute': 'human-monitor-command',
437       'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}}
438      {'execute': 'quit'}" \
439    | $QEMU -qmp stdio -nographic -nodefaults \
440            -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \
441    | _filter_qmp | _filter_qemu_io
442
443echo
444echo "=== Testing incoming inactive corrupted image ==="
445echo
446
447_make_test_img 64M
448# Create an unaligned L1 entry, so qemu will signal a corruption when
449# reading from the covered area
450poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a"
451
452# Inactive images are effectively read-only images, so this should be a
453# non-fatal corruption (which does not modify the image)
454echo "{'execute': 'qmp_capabilities'}
455      {'execute': 'human-monitor-command',
456       'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}}
457      {'execute': 'quit'}" \
458    | $QEMU -qmp stdio -nographic -nodefaults \
459            -blockdev "{'node-name': 'drive',
460                        'driver': 'qcow2',
461                        'file': {
462                            'driver': 'file',
463                            'filename': '$TEST_IMG'
464                        }}" \
465            -incoming exec:'cat /dev/null' \
466            2>&1 \
467    | _filter_qmp | _filter_qemu_io
468
469echo
470# Image should not have been marked corrupt
471_img_info --format-specific | grep 'corrupt:'
472
473# success, all done
474echo "*** done"
475rm -f $seq.full
476status=0
477