3 # Test case for image corruption (overlapping data structures) in qcow2
5 # Copyright (C) 2013 Red Hat, Inc.
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 owner
=mreitz@redhat.com
25 echo "QA output created by $seq"
27 status
=1 # failure is the default!
33 trap "_cleanup; exit \$status" 0 1 2 3 15
35 # Sometimes the error line might be dumped before/after an event
36 # randomly. Mask it out for specific test that may trigger this
37 # uncertainty for current test for now.
40 sed '/Input\/output error/d'
43 # get standard environment, filters and checks
47 # This tests qocw2-specific low-level functionality
52 rt_offset
=65536 # 0x10000 (XXX: just an assumption)
53 rb_offset
=131072 # 0x20000 (XXX: just an assumption)
54 l1_offset
=196608 # 0x30000 (XXX: just an assumption)
55 l2_offset
=262144 # 0x40000 (XXX: just an assumption)
56 l2_offset_after_snapshot
=524288 # 0x80000 (XXX: just an assumption)
60 OPEN_RW
="open -o overlap-check=all $TEST_IMG"
61 # Overlap checks are done before write operations only, therefore opening an
62 # image read-only makes the overlap-check option irrelevant
63 OPEN_RO
="open -r $TEST_IMG"
66 echo "=== Testing L2 reference into L1 ==="
69 # Link first L1 entry (first L2 table) onto itself
70 # (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
71 # later write will result in a COW operation, effectively ruining this attempt
72 # on image corruption)
73 poke_file
"$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
76 # The corrupt bit should not be set anyway
77 $PYTHON qcow2.py
"$TEST_IMG" dump-header |
grep incompatible_features
79 # Try to write something, thereby forcing the corrupt bit to be set
80 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
82 # The corrupt bit must now be set
83 $PYTHON qcow2.py
"$TEST_IMG" dump-header |
grep incompatible_features
85 # This information should be available through qemu-img info
86 _img_info
--format-specific
88 # Try to open the image R/W (which should fail)
89 $QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
93 # Try to open it RO (which should succeed)
94 $QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io
96 # We could now try to fix the image, but this would probably fail (how should an
97 # L2 table linked onto the L1 table be fixed?)
100 echo "=== Testing cluster data reference into refcount block ==="
104 truncate
-s "$(($l2_offset+65536))" "$TEST_IMG"
105 poke_file
"$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
106 # Mark cluster as used
107 poke_file
"$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
108 # Redirect new data cluster onto refcount block
109 poke_file
"$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
111 $PYTHON qcow2.py
"$TEST_IMG" dump-header |
grep incompatible_features
112 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
113 $PYTHON qcow2.py
"$TEST_IMG" dump-header |
grep incompatible_features
116 _check_test_img
-r all
118 # The corrupt bit should be cleared
119 $PYTHON qcow2.py
"$TEST_IMG" dump-header |
grep incompatible_features
121 # Look if it's really really fixed
122 $QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
123 $PYTHON qcow2.py
"$TEST_IMG" dump-header |
grep incompatible_features
126 echo "=== Testing cluster data reference into inactive L2 table ==="
129 $QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io
130 $QEMU_IMG snapshot
-c foo
"$TEST_IMG"
131 $QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
132 # The inactive L2 table remains at its old offset
133 poke_file
"$TEST_IMG" "$l2_offset_after_snapshot" \
134 "\x80\x00\x00\x00\x00\x04\x00\x00"
136 $PYTHON qcow2.py
"$TEST_IMG" dump-header |
grep incompatible_features
137 $QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
138 $PYTHON qcow2.py
"$TEST_IMG" dump-header |
grep incompatible_features
139 _check_test_img
-r all
140 $PYTHON qcow2.py
"$TEST_IMG" dump-header |
grep incompatible_features
141 $QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
142 $PYTHON qcow2.py
"$TEST_IMG" dump-header |
grep incompatible_features
145 $QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
146 $QEMU_IMG snapshot
-a foo
"$TEST_IMG"
148 $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
151 echo "=== Testing overlap while COW is in flight ==="
153 # compat=0.10 is required in order to make the following discard actually
154 # unallocate the sector rather than make it a zero sector - we want COW, after
156 IMGOPTS
='compat=0.10' _make_test_img
1G
157 # Write two clusters, the second one enforces creation of an L2 table after
158 # the first data cluster.
159 $QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
160 # Discard the first cluster. This cluster will soon enough be reallocated and
162 $QEMU_IO -c 'discard 0k 64k' "$TEST_IMG" | _filter_qemu_io
163 # Now, corrupt the image by marking the second L2 table cluster as free.
164 poke_file
"$TEST_IMG" '131084' "\x00\x00" # 0x2000c
165 # Start a write operation requiring COW on the image stopping it right before
166 # doing the read; then, trigger the corruption prevention by writing anything to
167 # any unallocated cluster, leading to an attempt to overwrite the second L2
168 # table. Finally, resume the COW write and see it fail (but not crash).
169 echo "open -o file.driver=blkdebug $TEST_IMG
174 resume 0" |
$QEMU_IO | _filter_qemu_io
177 echo "=== Testing unallocated image header ==="
181 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
182 poke_file
"$TEST_IMG" "$rb_offset" "\x00\x00"
183 $QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
186 echo "=== Testing unaligned L1 entry ==="
189 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
190 # This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
191 # aligned or not does not matter
192 poke_file
"$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
193 $QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
195 # Test how well zero cluster expansion can cope with this
197 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
198 poke_file
"$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
199 $QEMU_IMG amend
-o compat
=0.10 "$TEST_IMG"
202 echo "=== Testing unaligned L2 entry ==="
205 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
206 poke_file
"$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
207 $QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
210 echo "=== Testing unaligned pre-allocated zero cluster ==="
213 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
214 poke_file
"$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01"
215 # zero cluster expansion
216 $QEMU_IMG amend
-o compat
=0.10 "$TEST_IMG"
219 echo "=== Testing unaligned reftable entry ==="
222 poke_file
"$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
223 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
226 echo "=== Testing non-fatal corruption on freeing ==="
229 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
230 poke_file
"$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
231 $QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
234 echo "=== Testing read-only corruption report ==="
237 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
238 poke_file
"$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
239 # Should only emit a single error message
240 $QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
243 echo "=== Testing non-fatal and then fatal corruption report ==="
246 $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
247 poke_file
"$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
248 poke_file
"$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
249 # Should emit two error messages
250 $QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
253 echo "=== Testing empty refcount table ==="
256 poke_file
"$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
257 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
259 _check_test_img
-r all
262 echo "=== Testing empty refcount table with valid L1 and L2 tables ==="
265 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
266 poke_file
"$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
267 # Since the first data cluster is already allocated this triggers an
268 # allocation with an explicit offset (using qcow2_alloc_clusters_at())
269 # causing a refcount block to be allocated at offset 0
270 $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
272 _check_test_img
-r all
275 echo "=== Testing empty refcount block ==="
278 poke_file
"$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
279 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
281 _check_test_img
-r all
284 echo "=== Testing empty refcount block with compressed write ==="
287 $QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
288 poke_file
"$TEST_IMG" "$rb_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
289 # The previous write already allocated an L2 table, so now this new
290 # write will try to allocate a compressed data cluster at offset 0.
291 $QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io
293 _check_test_img
-r all
296 echo "=== Testing zero refcount table size ==="
299 poke_file
"$TEST_IMG" "56" "\x00\x00\x00\x00"
300 $QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
302 _check_test_img
-r all
305 echo "=== Testing incorrect refcount table offset ==="
308 poke_file
"$TEST_IMG" "48" "\x00\x00\x00\x00\x00\x00\x00\x00"
309 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
312 echo "=== Testing dirty corrupt image ==="
317 # Let the refblock appear unaligned
318 poke_file
"$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\xff\xff\x2a\x00"
319 # Mark the image dirty, thus forcing an automatic check when opening it
320 poke_file
"$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01"
321 # Open the image (qemu should refuse to do so)
322 $QEMU_IO -c close
"$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
324 echo '--- Repairing ---'
326 # The actual repair should have happened (because of the dirty bit),
327 # but some cleanup may have failed (like freeing the old reftable)
328 # because the image was already marked corrupt by that point
329 _check_test_img
-r all
332 echo "=== Writing to an unaligned preallocated zero cluster ==="
337 # Allocate the L2 table
338 $QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
339 # Pretend there is a preallocated zero cluster somewhere inside the
341 poke_file
"$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01"
343 $QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
345 echo '--- Repairing ---'
346 _check_test_img
-r all
349 echo '=== Discarding with an unaligned refblock ==='
354 $QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
355 # Make our refblock unaligned
356 poke_file
"$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00"
357 # Now try to discard something that will be submitted as two requests
359 $QEMU_IO -c "discard 0 65537" "$TEST_IMG"
361 echo '--- Repairing ---'
362 # Fails the first repair because the corruption prevents the check
363 # function from double-checking
364 # (Using -q for the first invocation, because otherwise the
365 # double-check error message appears above the summary for some
366 # reason -- so let's just hide the summary)
367 _check_test_img
-q -r all
368 _check_test_img
-r all
371 echo "=== Discarding an out-of-bounds refblock ==="
376 # Pretend there's a refblock really up high
377 poke_file
"$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00"
378 # Let's try to shrink the qcow2 image so that the block driver tries
379 # to discard that refblock (and see what happens!)
380 $QEMU_IMG resize
--shrink "$TEST_IMG" 32M
382 echo '--- Checking and retrying ---'
383 # Image should not be resized
384 _img_info |
grep 'virtual size'
385 # But it should pass this check, because the "partial" resize has
386 # already overwritten refblocks past the end
387 _check_test_img
-r all
389 $QEMU_IMG resize
--shrink "$TEST_IMG" 32M
390 _img_info |
grep 'virtual size'
393 echo "=== Discarding a non-covered in-bounds refblock ==="
396 IMGOPTS
='refcount_bits=1' _make_test_img
64M
398 # Pretend there's a refblock somewhere where there is no refblock to
399 # cover it (but the covering refblock has a valid index in the
401 # Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point
402 # to 0x10_0000_0000 (64G) to point to the third refblock
403 poke_file
"$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
404 $QEMU_IMG resize
--shrink "$TEST_IMG" 32M
406 echo '--- Checking and retrying ---'
407 # Image should not be resized
408 _img_info |
grep 'virtual size'
409 # But it should pass this check, because the "partial" resize has
410 # already overwritten refblocks past the end
411 _check_test_img
-r all
413 $QEMU_IMG resize
--shrink "$TEST_IMG" 32M
414 _img_info |
grep 'virtual size'
417 echo "=== Discarding a refblock covered by an unaligned refblock ==="
420 IMGOPTS
='refcount_bits=1' _make_test_img
64M
423 poke_file
"$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
424 # But now we actually "create" an unaligned third refblock
425 poke_file
"$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00"
426 $QEMU_IMG resize
--shrink "$TEST_IMG" 32M
428 echo '--- Repairing ---'
429 # Fails the first repair because the corruption prevents the check
430 # function from double-checking
431 # (Using -q for the first invocation, because otherwise the
432 # double-check error message appears above the summary for some
433 # reason -- so let's just hide the summary)
434 _check_test_img
-q -r all
435 _check_test_img
-r all
438 echo "=== Testing the QEMU shutdown with a corrupted image ==="
441 poke_file
"$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
442 echo "{'execute': 'qmp_capabilities'}
443 {'execute': 'human-monitor-command',
444 'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}}
445 {'execute': 'quit'}" \
446 |
$QEMU -qmp stdio
-nographic -nodefaults \
447 -drive if=none
,node-name
=drive
,file="$TEST_IMG",driver
=qcow2 \
448 | _filter_qmp | _filter_qemu_io
451 echo "=== Testing incoming inactive corrupted image ==="
455 # Create an unaligned L1 entry, so qemu will signal a corruption when
456 # reading from the covered area
457 poke_file
"$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a"
459 # Inactive images are effectively read-only images, so this should be a
460 # non-fatal corruption (which does not modify the image)
461 echo "{'execute': 'qmp_capabilities'}
462 {'execute': 'human-monitor-command',
463 'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}}
464 {'execute': 'quit'}" \
465 |
$QEMU -qmp stdio
-nographic -nodefaults \
466 -blockdev "{'node-name': 'drive',
470 'filename': '$TEST_IMG'
472 -incoming exec:'cat /dev/null' \
474 | _filter_qmp | _filter_qemu_io | _filter_io_error
477 # Image should not have been marked corrupt
478 _img_info
--format-specific |
grep 'corrupt:'