xref: /openbmc/qemu/tests/qemu-iotests/257 (revision e4ec5ad4)
1#!/usr/bin/env python
2#
3# Test bitmap-sync backups (incremental, differential, and partials)
4#
5# Copyright (c) 2019 John Snow for Red Hat, Inc.
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program.  If not, see <http://www.gnu.org/licenses/>.
19#
20# owner=jsnow@redhat.com
21
22import math
23import os
24
25import iotests
26from iotests import log, qemu_img
27
28SIZE = 64 * 1024 * 1024
29GRANULARITY = 64 * 1024
30
31
32class Pattern:
33    def __init__(self, byte, offset, size=GRANULARITY):
34        self.byte = byte
35        self.offset = offset
36        self.size = size
37
38    def bits(self, granularity):
39        lower = self.offset // granularity
40        upper = (self.offset + self.size - 1) // granularity
41        return set(range(lower, upper + 1))
42
43
44class PatternGroup:
45    """Grouping of Pattern objects. Initialize with an iterable of Patterns."""
46    def __init__(self, patterns):
47        self.patterns = patterns
48
49    def bits(self, granularity):
50        """Calculate the unique bits dirtied by this pattern grouping"""
51        res = set()
52        for pattern in self.patterns:
53            res |= pattern.bits(granularity)
54        return res
55
56
57GROUPS = [
58    PatternGroup([
59        # Batch 0: 4 clusters
60        Pattern('0x49', 0x0000000),
61        Pattern('0x6c', 0x0100000),   # 1M
62        Pattern('0x6f', 0x2000000),   # 32M
63        Pattern('0x76', 0x3ff0000)]), # 64M - 64K
64    PatternGroup([
65        # Batch 1: 6 clusters (3 new)
66        Pattern('0x65', 0x0000000),   # Full overwrite
67        Pattern('0x77', 0x00f8000),   # Partial-left (1M-32K)
68        Pattern('0x72', 0x2008000),   # Partial-right (32M+32K)
69        Pattern('0x69', 0x3fe0000)]), # Adjacent-left (64M - 128K)
70    PatternGroup([
71        # Batch 2: 7 clusters (3 new)
72        Pattern('0x74', 0x0010000),   # Adjacent-right
73        Pattern('0x69', 0x00e8000),   # Partial-left  (1M-96K)
74        Pattern('0x6e', 0x2018000),   # Partial-right (32M+96K)
75        Pattern('0x67', 0x3fe0000,
76                2*GRANULARITY)]),     # Overwrite [(64M-128K)-64M)
77    PatternGroup([
78        # Batch 3: 8 clusters (5 new)
79        # Carefully chosen such that nothing re-dirties the one cluster
80        # that copies out successfully before failure in Group #1.
81        Pattern('0xaa', 0x0010000,
82                3*GRANULARITY),       # Overwrite and 2x Adjacent-right
83        Pattern('0xbb', 0x00d8000),   # Partial-left (1M-160K)
84        Pattern('0xcc', 0x2028000),   # Partial-right (32M+160K)
85        Pattern('0xdd', 0x3fc0000)]), # New; leaving a gap to the right
86]
87
88
89class EmulatedBitmap:
90    def __init__(self, granularity=GRANULARITY):
91        self._bits = set()
92        self.granularity = granularity
93
94    def dirty_bits(self, bits):
95        self._bits |= set(bits)
96
97    def dirty_group(self, n):
98        self.dirty_bits(GROUPS[n].bits(self.granularity))
99
100    def clear(self):
101        self._bits = set()
102
103    def clear_bits(self, bits):
104        self._bits -= set(bits)
105
106    def clear_bit(self, bit):
107        self.clear_bits({bit})
108
109    def clear_group(self, n):
110        self.clear_bits(GROUPS[n].bits(self.granularity))
111
112    @property
113    def first_bit(self):
114        return sorted(self.bits)[0]
115
116    @property
117    def bits(self):
118        return self._bits
119
120    @property
121    def count(self):
122        return len(self.bits)
123
124    def compare(self, qmp_bitmap):
125        """
126        Print a nice human-readable message checking that a bitmap as reported
127        by the QMP interface has as many bits set as we expect it to.
128        """
129
130        name = qmp_bitmap.get('name', '(anonymous)')
131        log("= Checking Bitmap {:s} =".format(name))
132
133        want = self.count
134        have = qmp_bitmap['count'] // qmp_bitmap['granularity']
135
136        log("expecting {:d} dirty sectors; have {:d}. {:s}".format(
137            want, have, "OK!" if want == have else "ERROR!"))
138        log('')
139
140
141class Drive:
142    """Represents, vaguely, a drive attached to a VM.
143    Includes format, graph, and device information."""
144
145    def __init__(self, path, vm=None):
146        self.path = path
147        self.vm = vm
148        self.fmt = None
149        self.size = None
150        self.node = None
151
152    def img_create(self, fmt, size):
153        self.fmt = fmt
154        self.size = size
155        iotests.qemu_img_create('-f', self.fmt, self.path, str(self.size))
156
157    def create_target(self, name, fmt, size):
158        basename = os.path.basename(self.path)
159        file_node_name = "file_{}".format(basename)
160        vm = self.vm
161
162        log(vm.command('blockdev-create', job_id='bdc-file-job',
163                       options={
164                           'driver': 'file',
165                           'filename': self.path,
166                           'size': 0,
167                       }))
168        vm.run_job('bdc-file-job')
169        log(vm.command('blockdev-add', driver='file',
170                       node_name=file_node_name, filename=self.path))
171
172        log(vm.command('blockdev-create', job_id='bdc-fmt-job',
173                       options={
174                           'driver': fmt,
175                           'file': file_node_name,
176                           'size': size,
177                       }))
178        vm.run_job('bdc-fmt-job')
179        log(vm.command('blockdev-add', driver=fmt,
180                       node_name=name,
181                       file=file_node_name))
182        self.fmt = fmt
183        self.size = size
184        self.node = name
185
186def blockdev_backup(vm, device, target, sync, **kwargs):
187    # Strip any arguments explicitly nulled by the caller:
188    kwargs = {key: val for key, val in kwargs.items() if val is not None}
189    result = vm.qmp_log('blockdev-backup',
190                        device=device,
191                        target=target,
192                        sync=sync,
193                        filter_node_name='backup-top',
194                        **kwargs)
195    return result
196
197def blockdev_backup_mktarget(drive, target_id, filepath, sync, **kwargs):
198    target_drive = Drive(filepath, vm=drive.vm)
199    target_drive.create_target(target_id, drive.fmt, drive.size)
200    blockdev_backup(drive.vm, drive.node, target_id, sync, **kwargs)
201
202def reference_backup(drive, n, filepath):
203    log("--- Reference Backup #{:d} ---\n".format(n))
204    target_id = "ref_target_{:d}".format(n)
205    job_id = "ref_backup_{:d}".format(n)
206    blockdev_backup_mktarget(drive, target_id, filepath, "full",
207                             job_id=job_id)
208    drive.vm.run_job(job_id, auto_dismiss=True)
209    log('')
210
211def backup(drive, n, filepath, sync, **kwargs):
212    log("--- Test Backup #{:d} ---\n".format(n))
213    target_id = "backup_target_{:d}".format(n)
214    job_id = "backup_{:d}".format(n)
215    kwargs.setdefault('auto-finalize', False)
216    blockdev_backup_mktarget(drive, target_id, filepath, sync,
217                             job_id=job_id, **kwargs)
218    return job_id
219
220def perform_writes(drive, n, filter_node_name=None):
221    log("--- Write #{:d} ---\n".format(n))
222    for pattern in GROUPS[n].patterns:
223        cmd = "write -P{:s} 0x{:07x} 0x{:x}".format(
224            pattern.byte,
225            pattern.offset,
226            pattern.size)
227        log(cmd)
228        log(drive.vm.hmp_qemu_io(filter_node_name or drive.node, cmd))
229    bitmaps = drive.vm.query_bitmaps()
230    log({'bitmaps': bitmaps}, indent=2)
231    log('')
232    return bitmaps
233
234
235def compare_images(image, reference, baseimg=None, expected_match=True):
236    """
237    Print a nice human-readable message comparing these images.
238    """
239    expected_ret = 0 if expected_match else 1
240    if baseimg:
241        assert qemu_img("rebase", "-u", "-b", baseimg, image) == 0
242    ret = qemu_img("compare", image, reference)
243    log('qemu_img compare "{:s}" "{:s}" ==> {:s}, {:s}'.format(
244        image, reference,
245        "Identical" if ret == 0 else "Mismatch",
246        "OK!" if ret == expected_ret else "ERROR!"),
247        filters=[iotests.filter_testfiles])
248
249def test_bitmap_sync(bsync_mode, msync_mode='bitmap', failure=None):
250    """
251    Test bitmap backup routines.
252
253    :param bsync_mode: Is the Bitmap Sync mode, and can be any of:
254        - on-success: This is the "incremental" style mode. Bitmaps are
255                      synchronized to what was copied out only on success.
256                      (Partial images must be discarded.)
257        - never:      This is the "differential" style mode.
258                      Bitmaps are never synchronized.
259        - always:     This is a "best effort" style mode.
260                      Bitmaps are always synchronized, regardless of failure.
261                      (Partial images must be kept.)
262
263    :param msync_mode: The mirror sync mode to use for the first backup.
264                       Can be any one of:
265        - bitmap: Backups based on bitmap manifest.
266        - full:   Full backups.
267        - top:    Full backups of the top layer only.
268
269    :param failure: Is the (optional) failure mode, and can be any of:
270        - None:         No failure. Test the normative path. Default.
271        - simulated:    Cancel the job right before it completes.
272                        This also tests writes "during" the job.
273        - intermediate: This tests a job that fails mid-process and produces
274                        an incomplete backup. Testing limitations prevent
275                        testing competing writes.
276    """
277    with iotests.FilePaths(['img', 'bsync1', 'bsync2',
278                            'fbackup0', 'fbackup1', 'fbackup2']) as \
279                            (img_path, bsync1, bsync2,
280                             fbackup0, fbackup1, fbackup2), \
281         iotests.VM() as vm:
282
283        mode = "Mode {:s}; Bitmap Sync {:s}".format(msync_mode, bsync_mode)
284        preposition = "with" if failure else "without"
285        cond = "{:s} {:s}".format(preposition,
286                                  "{:s} failure".format(failure) if failure
287                                  else "failure")
288        log("\n=== {:s} {:s} ===\n".format(mode, cond))
289
290        log('--- Preparing image & VM ---\n')
291        drive0 = Drive(img_path, vm=vm)
292        drive0.img_create(iotests.imgfmt, SIZE)
293        vm.add_device("{},id=scsi0".format(iotests.get_virtio_scsi_device()))
294        vm.launch()
295
296        file_config = {
297            'driver': 'file',
298            'filename': drive0.path
299        }
300
301        if failure == 'intermediate':
302            file_config = {
303                'driver': 'blkdebug',
304                'image': file_config,
305                'set-state': [{
306                    'event': 'flush_to_disk',
307                    'state': 1,
308                    'new_state': 2
309                }, {
310                    'event': 'read_aio',
311                    'state': 2,
312                    'new_state': 3
313                }],
314                'inject-error': [{
315                    'event': 'read_aio',
316                    'errno': 5,
317                    'state': 3,
318                    'immediately': False,
319                    'once': True
320                }]
321            }
322
323        drive0.node = 'drive0'
324        vm.qmp_log('blockdev-add',
325                   filters=[iotests.filter_qmp_testfiles],
326                   node_name=drive0.node,
327                   driver=drive0.fmt,
328                   file=file_config)
329        log('')
330
331        # 0 - Writes and Reference Backup
332        perform_writes(drive0, 0)
333        reference_backup(drive0, 0, fbackup0)
334        log('--- Add Bitmap ---\n')
335        vm.qmp_log("block-dirty-bitmap-add", node=drive0.node,
336                   name="bitmap0", granularity=GRANULARITY)
337        log('')
338        ebitmap = EmulatedBitmap()
339
340        # 1 - Writes and Reference Backup
341        bitmaps = perform_writes(drive0, 1)
342        ebitmap.dirty_group(1)
343        bitmap = vm.get_bitmap(drive0.node, 'bitmap0', bitmaps=bitmaps)
344        ebitmap.compare(bitmap)
345        reference_backup(drive0, 1, fbackup1)
346
347        # 1 - Test Backup (w/ Optional induced failure)
348        if failure == 'intermediate':
349            # Activate blkdebug induced failure for second-to-next read
350            log(vm.hmp_qemu_io(drive0.node, 'flush'))
351            log('')
352        job = backup(drive0, 1, bsync1, msync_mode,
353                     bitmap="bitmap0", bitmap_mode=bsync_mode)
354
355        def _callback():
356            """Issue writes while the job is open to test bitmap divergence."""
357            # Note: when `failure` is 'intermediate', this isn't called.
358            log('')
359            bitmaps = perform_writes(drive0, 2, filter_node_name='backup-top')
360            # Named bitmap (static, should be unchanged)
361            ebitmap.compare(vm.get_bitmap(drive0.node, 'bitmap0',
362                                          bitmaps=bitmaps))
363            # Anonymous bitmap (dynamic, shows new writes)
364            anonymous = EmulatedBitmap()
365            anonymous.dirty_group(2)
366            anonymous.compare(vm.get_bitmap(drive0.node, '', recording=True,
367                                            bitmaps=bitmaps))
368
369            # Simulate the order in which this will happen:
370            # group 1 gets cleared first, then group two gets written.
371            if ((bsync_mode == 'on-success' and not failure) or
372                (bsync_mode == 'always')):
373                ebitmap.clear()
374            ebitmap.dirty_group(2)
375
376        vm.run_job(job, auto_dismiss=True, auto_finalize=False,
377                   pre_finalize=_callback,
378                   cancel=(failure == 'simulated'))
379        bitmaps = vm.query_bitmaps()
380        log({'bitmaps': bitmaps}, indent=2)
381        log('')
382
383        if bsync_mode == 'always' and failure == 'intermediate':
384            # TOP treats anything allocated as dirty, expect to see:
385            if msync_mode == 'top':
386                ebitmap.dirty_group(0)
387
388            # We manage to copy one sector (one bit) before the error.
389            ebitmap.clear_bit(ebitmap.first_bit)
390
391            # Full returns all bits set except what was copied/skipped
392            if msync_mode == 'full':
393                fail_bit = ebitmap.first_bit
394                ebitmap.clear()
395                ebitmap.dirty_bits(range(fail_bit, SIZE // GRANULARITY))
396
397        ebitmap.compare(vm.get_bitmap(drive0.node, 'bitmap0', bitmaps=bitmaps))
398
399        # 2 - Writes and Reference Backup
400        bitmaps = perform_writes(drive0, 3)
401        ebitmap.dirty_group(3)
402        ebitmap.compare(vm.get_bitmap(drive0.node, 'bitmap0', bitmaps=bitmaps))
403        reference_backup(drive0, 2, fbackup2)
404
405        # 2 - Bitmap Backup (In failure modes, this is a recovery.)
406        job = backup(drive0, 2, bsync2, "bitmap",
407                     bitmap="bitmap0", bitmap_mode=bsync_mode)
408        vm.run_job(job, auto_dismiss=True, auto_finalize=False)
409        bitmaps = vm.query_bitmaps()
410        log({'bitmaps': bitmaps}, indent=2)
411        log('')
412        if bsync_mode != 'never':
413            ebitmap.clear()
414        ebitmap.compare(vm.get_bitmap(drive0.node, 'bitmap0', bitmaps=bitmaps))
415
416        log('--- Cleanup ---\n')
417        vm.qmp_log("block-dirty-bitmap-remove",
418                   node=drive0.node, name="bitmap0")
419        bitmaps = vm.query_bitmaps()
420        log({'bitmaps': bitmaps}, indent=2)
421        vm.shutdown()
422        log('')
423
424        log('--- Verification ---\n')
425        # 'simulated' failures will actually all pass here because we canceled
426        # while "pending". This is actually undefined behavior,
427        # don't rely on this to be true!
428        compare_images(bsync1, fbackup1, baseimg=fbackup0,
429                       expected_match=failure != 'intermediate')
430        if not failure or bsync_mode == 'always':
431            # Always keep the last backup on success or when using 'always'
432            base = bsync1
433        else:
434            base = fbackup0
435        compare_images(bsync2, fbackup2, baseimg=base)
436        compare_images(img_path, fbackup2)
437        log('')
438
439def test_backup_api():
440    """
441    Test malformed and prohibited invocations of the backup API.
442    """
443    with iotests.FilePaths(['img', 'bsync1']) as \
444         (img_path, backup_path), \
445         iotests.VM() as vm:
446
447        log("\n=== API failure tests ===\n")
448        log('--- Preparing image & VM ---\n')
449        drive0 = Drive(img_path, vm=vm)
450        drive0.img_create(iotests.imgfmt, SIZE)
451        vm.add_device("{},id=scsi0".format(iotests.get_virtio_scsi_device()))
452        vm.launch()
453
454        file_config = {
455            'driver': 'file',
456            'filename': drive0.path
457        }
458
459        drive0.node = 'drive0'
460        vm.qmp_log('blockdev-add',
461                   filters=[iotests.filter_qmp_testfiles],
462                   node_name=drive0.node,
463                   driver=drive0.fmt,
464                   file=file_config)
465        log('')
466
467        target0 = Drive(backup_path, vm=vm)
468        target0.create_target("backup_target", drive0.fmt, drive0.size)
469        log('')
470
471        vm.qmp_log("block-dirty-bitmap-add", node=drive0.node,
472                   name="bitmap0", granularity=GRANULARITY)
473        log('')
474
475        log('-- Testing invalid QMP commands --\n')
476
477        error_cases = {
478            'incremental': {
479                None:        ['on-success', 'always', 'never', None],
480                'bitmap404': ['on-success', 'always', 'never', None],
481                'bitmap0':   ['always', 'never']
482            },
483            'bitmap': {
484                None:        ['on-success', 'always', 'never', None],
485                'bitmap404': ['on-success', 'always', 'never', None],
486                'bitmap0':   [None],
487            },
488            'full': {
489                None:        ['on-success', 'always', 'never'],
490                'bitmap404': ['on-success', 'always', 'never', None],
491                'bitmap0':   ['never', None],
492            },
493            'top': {
494                None:        ['on-success', 'always', 'never'],
495                'bitmap404': ['on-success', 'always', 'never', None],
496                'bitmap0':   ['never', None],
497            },
498            'none': {
499                None:        ['on-success', 'always', 'never'],
500                'bitmap404': ['on-success', 'always', 'never', None],
501                'bitmap0':   ['on-success', 'always', 'never', None],
502            }
503        }
504
505        # Dicts, as always, are not stably-ordered prior to 3.7, so use tuples:
506        for sync_mode in ('incremental', 'bitmap', 'full', 'top', 'none'):
507            log("-- Sync mode {:s} tests --\n".format(sync_mode))
508            for bitmap in (None, 'bitmap404', 'bitmap0'):
509                for policy in error_cases[sync_mode][bitmap]:
510                    blockdev_backup(drive0.vm, drive0.node, "backup_target",
511                                    sync_mode, job_id='api_job',
512                                    bitmap=bitmap, bitmap_mode=policy)
513                    log('')
514
515
516def main():
517    for bsync_mode in ("never", "on-success", "always"):
518        for failure in ("simulated", "intermediate", None):
519            test_bitmap_sync(bsync_mode, "bitmap", failure)
520
521    for sync_mode in ('full', 'top'):
522        for bsync_mode in ('on-success', 'always'):
523            for failure in ('simulated', 'intermediate', None):
524                test_bitmap_sync(bsync_mode, sync_mode, failure)
525
526    test_backup_api()
527
528if __name__ == '__main__':
529    iotests.script_main(main, supported_fmts=['qcow2'],
530                        supported_protocols=['file'])
531