xref: /openbmc/qemu/tests/qemu-iotests/257 (revision 7678b74a)
1#!/usr/bin/env python
2#
3# Test bitmap-sync backups (incremental, differential, and partials)
4#
5# Copyright (c) 2019 John Snow for Red Hat, Inc.
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program.  If not, see <http://www.gnu.org/licenses/>.
19#
20# owner=jsnow@redhat.com
21
22import math
23import os
24
25import iotests
26from iotests import log, qemu_img
27
28SIZE = 64 * 1024 * 1024
29GRANULARITY = 64 * 1024
30
31
32class Pattern:
33    def __init__(self, byte, offset, size=GRANULARITY):
34        self.byte = byte
35        self.offset = offset
36        self.size = size
37
38    def bits(self, granularity):
39        lower = self.offset // granularity
40        upper = (self.offset + self.size - 1) // granularity
41        return set(range(lower, upper + 1))
42
43
44class PatternGroup:
45    """Grouping of Pattern objects. Initialize with an iterable of Patterns."""
46    def __init__(self, patterns):
47        self.patterns = patterns
48
49    def bits(self, granularity):
50        """Calculate the unique bits dirtied by this pattern grouping"""
51        res = set()
52        for pattern in self.patterns:
53            res |= pattern.bits(granularity)
54        return res
55
56
57GROUPS = [
58    PatternGroup([
59        # Batch 0: 4 clusters
60        Pattern('0x49', 0x0000000),
61        Pattern('0x6c', 0x0100000),   # 1M
62        Pattern('0x6f', 0x2000000),   # 32M
63        Pattern('0x76', 0x3ff0000)]), # 64M - 64K
64    PatternGroup([
65        # Batch 1: 6 clusters (3 new)
66        Pattern('0x65', 0x0000000),   # Full overwrite
67        Pattern('0x77', 0x00f8000),   # Partial-left (1M-32K)
68        Pattern('0x72', 0x2008000),   # Partial-right (32M+32K)
69        Pattern('0x69', 0x3fe0000)]), # Adjacent-left (64M - 128K)
70    PatternGroup([
71        # Batch 2: 7 clusters (3 new)
72        Pattern('0x74', 0x0010000),   # Adjacent-right
73        Pattern('0x69', 0x00e8000),   # Partial-left  (1M-96K)
74        Pattern('0x6e', 0x2018000),   # Partial-right (32M+96K)
75        Pattern('0x67', 0x3fe0000,
76                2*GRANULARITY)]),     # Overwrite [(64M-128K)-64M)
77    PatternGroup([
78        # Batch 3: 8 clusters (5 new)
79        # Carefully chosen such that nothing re-dirties the one cluster
80        # that copies out successfully before failure in Group #1.
81        Pattern('0xaa', 0x0010000,
82                3*GRANULARITY),       # Overwrite and 2x Adjacent-right
83        Pattern('0xbb', 0x00d8000),   # Partial-left (1M-160K)
84        Pattern('0xcc', 0x2028000),   # Partial-right (32M+160K)
85        Pattern('0xdd', 0x3fc0000)]), # New; leaving a gap to the right
86]
87
88
89class EmulatedBitmap:
90    def __init__(self, granularity=GRANULARITY):
91        self._bits = set()
92        self.granularity = granularity
93
94    def dirty_bits(self, bits):
95        self._bits |= set(bits)
96
97    def dirty_group(self, n):
98        self.dirty_bits(GROUPS[n].bits(self.granularity))
99
100    def clear(self):
101        self._bits = set()
102
103    def clear_bits(self, bits):
104        self._bits -= set(bits)
105
106    def clear_bit(self, bit):
107        self.clear_bits({bit})
108
109    def clear_group(self, n):
110        self.clear_bits(GROUPS[n].bits(self.granularity))
111
112    @property
113    def first_bit(self):
114        return sorted(self.bits)[0]
115
116    @property
117    def bits(self):
118        return self._bits
119
120    @property
121    def count(self):
122        return len(self.bits)
123
124    def compare(self, qmp_bitmap):
125        """
126        Print a nice human-readable message checking that a bitmap as reported
127        by the QMP interface has as many bits set as we expect it to.
128        """
129
130        name = qmp_bitmap.get('name', '(anonymous)')
131        log("= Checking Bitmap {:s} =".format(name))
132
133        want = self.count
134        have = qmp_bitmap['count'] // qmp_bitmap['granularity']
135
136        log("expecting {:d} dirty sectors; have {:d}. {:s}".format(
137            want, have, "OK!" if want == have else "ERROR!"))
138        log('')
139
140
141class Drive:
142    """Represents, vaguely, a drive attached to a VM.
143    Includes format, graph, and device information."""
144
145    def __init__(self, path, vm=None):
146        self.path = path
147        self.vm = vm
148        self.fmt = None
149        self.size = None
150        self.node = None
151        self.device = None
152
153    @property
154    def name(self):
155        return self.node or self.device
156
157    def img_create(self, fmt, size):
158        self.fmt = fmt
159        self.size = size
160        iotests.qemu_img_create('-f', self.fmt, self.path, str(self.size))
161
162    def create_target(self, name, fmt, size):
163        basename = os.path.basename(self.path)
164        file_node_name = "file_{}".format(basename)
165        vm = self.vm
166
167        log(vm.command('blockdev-create', job_id='bdc-file-job',
168                       options={
169                           'driver': 'file',
170                           'filename': self.path,
171                           'size': 0,
172                       }))
173        vm.run_job('bdc-file-job')
174        log(vm.command('blockdev-add', driver='file',
175                       node_name=file_node_name, filename=self.path))
176
177        log(vm.command('blockdev-create', job_id='bdc-fmt-job',
178                       options={
179                           'driver': fmt,
180                           'file': file_node_name,
181                           'size': size,
182                       }))
183        vm.run_job('bdc-fmt-job')
184        log(vm.command('blockdev-add', driver=fmt,
185                       node_name=name,
186                       file=file_node_name))
187        self.fmt = fmt
188        self.size = size
189        self.node = name
190
191def query_bitmaps(vm):
192    res = vm.qmp("query-block")
193    return {"bitmaps": {device['device'] or device['qdev']:
194                        device.get('dirty-bitmaps', []) for
195                        device in res['return']}}
196
197def get_bitmap(bitmaps, drivename, name, recording=None):
198    """
199    get a specific bitmap from the object returned by query_bitmaps.
200    :param recording: If specified, filter results by the specified value.
201    """
202    for bitmap in bitmaps['bitmaps'][drivename]:
203        if bitmap.get('name', '') == name:
204            if recording is None:
205                return bitmap
206            elif bitmap.get('recording') == recording:
207                return bitmap
208    return None
209
210def blockdev_backup(vm, device, target, sync, **kwargs):
211    # Strip any arguments explicitly nulled by the caller:
212    kwargs = {key: val for key, val in kwargs.items() if val is not None}
213    result = vm.qmp_log('blockdev-backup',
214                        device=device,
215                        target=target,
216                        sync=sync,
217                        **kwargs)
218    return result
219
220def blockdev_backup_mktarget(drive, target_id, filepath, sync, **kwargs):
221    target_drive = Drive(filepath, vm=drive.vm)
222    target_drive.create_target(target_id, drive.fmt, drive.size)
223    blockdev_backup(drive.vm, drive.name, target_id, sync, **kwargs)
224
225def reference_backup(drive, n, filepath):
226    log("--- Reference Backup #{:d} ---\n".format(n))
227    target_id = "ref_target_{:d}".format(n)
228    job_id = "ref_backup_{:d}".format(n)
229    blockdev_backup_mktarget(drive, target_id, filepath, "full",
230                             job_id=job_id)
231    drive.vm.run_job(job_id, auto_dismiss=True)
232    log('')
233
234def backup(drive, n, filepath, sync, **kwargs):
235    log("--- Test Backup #{:d} ---\n".format(n))
236    target_id = "backup_target_{:d}".format(n)
237    job_id = "backup_{:d}".format(n)
238    kwargs.setdefault('auto-finalize', False)
239    blockdev_backup_mktarget(drive, target_id, filepath, sync,
240                             job_id=job_id, **kwargs)
241    return job_id
242
243def perform_writes(drive, n):
244    log("--- Write #{:d} ---\n".format(n))
245    for pattern in GROUPS[n].patterns:
246        cmd = "write -P{:s} 0x{:07x} 0x{:x}".format(
247            pattern.byte,
248            pattern.offset,
249            pattern.size)
250        log(cmd)
251        log(drive.vm.hmp_qemu_io(drive.name, cmd))
252    bitmaps = query_bitmaps(drive.vm)
253    log(bitmaps, indent=2)
254    log('')
255    return bitmaps
256
257
258def compare_images(image, reference, baseimg=None, expected_match=True):
259    """
260    Print a nice human-readable message comparing these images.
261    """
262    expected_ret = 0 if expected_match else 1
263    if baseimg:
264        assert qemu_img("rebase", "-u", "-b", baseimg, image) == 0
265    ret = qemu_img("compare", image, reference)
266    log('qemu_img compare "{:s}" "{:s}" ==> {:s}, {:s}'.format(
267        image, reference,
268        "Identical" if ret == 0 else "Mismatch",
269        "OK!" if ret == expected_ret else "ERROR!"),
270        filters=[iotests.filter_testfiles])
271
272def test_bitmap_sync(bsync_mode, msync_mode='bitmap', failure=None):
273    """
274    Test bitmap backup routines.
275
276    :param bsync_mode: Is the Bitmap Sync mode, and can be any of:
277        - on-success: This is the "incremental" style mode. Bitmaps are
278                      synchronized to what was copied out only on success.
279                      (Partial images must be discarded.)
280        - never:      This is the "differential" style mode.
281                      Bitmaps are never synchronized.
282        - always:     This is a "best effort" style mode.
283                      Bitmaps are always synchronized, regardless of failure.
284                      (Partial images must be kept.)
285
286    :param msync_mode: The mirror sync mode to use for the first backup.
287                       Can be any one of:
288        - bitmap: Backups based on bitmap manifest.
289        - full:   Full backups.
290        - top:    Full backups of the top layer only.
291
292    :param failure: Is the (optional) failure mode, and can be any of:
293        - None:         No failure. Test the normative path. Default.
294        - simulated:    Cancel the job right before it completes.
295                        This also tests writes "during" the job.
296        - intermediate: This tests a job that fails mid-process and produces
297                        an incomplete backup. Testing limitations prevent
298                        testing competing writes.
299    """
300    with iotests.FilePaths(['img', 'bsync1', 'bsync2',
301                            'fbackup0', 'fbackup1', 'fbackup2']) as \
302                            (img_path, bsync1, bsync2,
303                             fbackup0, fbackup1, fbackup2), \
304         iotests.VM() as vm:
305
306        mode = "Mode {:s}; Bitmap Sync {:s}".format(msync_mode, bsync_mode)
307        preposition = "with" if failure else "without"
308        cond = "{:s} {:s}".format(preposition,
309                                  "{:s} failure".format(failure) if failure
310                                  else "failure")
311        log("\n=== {:s} {:s} ===\n".format(mode, cond))
312
313        log('--- Preparing image & VM ---\n')
314        drive0 = Drive(img_path, vm=vm)
315        drive0.img_create(iotests.imgfmt, SIZE)
316        vm.add_device("{},id=scsi0".format(iotests.get_virtio_scsi_device()))
317        vm.launch()
318
319        file_config = {
320            'driver': 'file',
321            'filename': drive0.path
322        }
323
324        if failure == 'intermediate':
325            file_config = {
326                'driver': 'blkdebug',
327                'image': file_config,
328                'set-state': [{
329                    'event': 'flush_to_disk',
330                    'state': 1,
331                    'new_state': 2
332                }, {
333                    'event': 'read_aio',
334                    'state': 2,
335                    'new_state': 3
336                }],
337                'inject-error': [{
338                    'event': 'read_aio',
339                    'errno': 5,
340                    'state': 3,
341                    'immediately': False,
342                    'once': True
343                }]
344            }
345
346        vm.qmp_log('blockdev-add',
347                   filters=[iotests.filter_qmp_testfiles],
348                   node_name="drive0",
349                   driver=drive0.fmt,
350                   file=file_config)
351        drive0.node = 'drive0'
352        drive0.device = 'device0'
353        # Use share-rw to allow writes directly to the node;
354        # The anonymous block-backend for this configuration prevents us
355        # from using HMP's qemu-io commands to address the device.
356        vm.qmp_log("device_add", id=drive0.device,
357                   drive=drive0.name, driver="scsi-hd",
358                   share_rw=True)
359        log('')
360
361        # 0 - Writes and Reference Backup
362        perform_writes(drive0, 0)
363        reference_backup(drive0, 0, fbackup0)
364        log('--- Add Bitmap ---\n')
365        vm.qmp_log("block-dirty-bitmap-add", node=drive0.name,
366                   name="bitmap0", granularity=GRANULARITY)
367        log('')
368        ebitmap = EmulatedBitmap()
369
370        # 1 - Writes and Reference Backup
371        bitmaps = perform_writes(drive0, 1)
372        ebitmap.dirty_group(1)
373        bitmap = get_bitmap(bitmaps, drive0.device, 'bitmap0')
374        ebitmap.compare(bitmap)
375        reference_backup(drive0, 1, fbackup1)
376
377        # 1 - Test Backup (w/ Optional induced failure)
378        if failure == 'intermediate':
379            # Activate blkdebug induced failure for second-to-next read
380            log(vm.hmp_qemu_io(drive0.name, 'flush'))
381            log('')
382        job = backup(drive0, 1, bsync1, msync_mode,
383                     bitmap="bitmap0", bitmap_mode=bsync_mode)
384
385        def _callback():
386            """Issue writes while the job is open to test bitmap divergence."""
387            # Note: when `failure` is 'intermediate', this isn't called.
388            log('')
389            bitmaps = perform_writes(drive0, 2)
390            # Named bitmap (static, should be unchanged)
391            ebitmap.compare(get_bitmap(bitmaps, drive0.device, 'bitmap0'))
392            # Anonymous bitmap (dynamic, shows new writes)
393            anonymous = EmulatedBitmap()
394            anonymous.dirty_group(2)
395            anonymous.compare(get_bitmap(bitmaps, drive0.device, '',
396                                         recording=True))
397
398            # Simulate the order in which this will happen:
399            # group 1 gets cleared first, then group two gets written.
400            if ((bsync_mode == 'on-success' and not failure) or
401                (bsync_mode == 'always')):
402                ebitmap.clear()
403            ebitmap.dirty_group(2)
404
405        vm.run_job(job, auto_dismiss=True, auto_finalize=False,
406                   pre_finalize=_callback,
407                   cancel=(failure == 'simulated'))
408        bitmaps = query_bitmaps(vm)
409        log(bitmaps, indent=2)
410        log('')
411
412        if bsync_mode == 'always' and failure == 'intermediate':
413            # TOP treats anything allocated as dirty, expect to see:
414            if msync_mode == 'top':
415                ebitmap.dirty_group(0)
416
417            # We manage to copy one sector (one bit) before the error.
418            ebitmap.clear_bit(ebitmap.first_bit)
419
420            # Full returns all bits set except what was copied/skipped
421            if msync_mode == 'full':
422                fail_bit = ebitmap.first_bit
423                ebitmap.clear()
424                ebitmap.dirty_bits(range(fail_bit, SIZE // GRANULARITY))
425
426        ebitmap.compare(get_bitmap(bitmaps, drive0.device, 'bitmap0'))
427
428        # 2 - Writes and Reference Backup
429        bitmaps = perform_writes(drive0, 3)
430        ebitmap.dirty_group(3)
431        ebitmap.compare(get_bitmap(bitmaps, drive0.device, 'bitmap0'))
432        reference_backup(drive0, 2, fbackup2)
433
434        # 2 - Bitmap Backup (In failure modes, this is a recovery.)
435        job = backup(drive0, 2, bsync2, "bitmap",
436                     bitmap="bitmap0", bitmap_mode=bsync_mode)
437        vm.run_job(job, auto_dismiss=True, auto_finalize=False)
438        bitmaps = query_bitmaps(vm)
439        log(bitmaps, indent=2)
440        log('')
441        if bsync_mode != 'never':
442            ebitmap.clear()
443        ebitmap.compare(get_bitmap(bitmaps, drive0.device, 'bitmap0'))
444
445        log('--- Cleanup ---\n')
446        vm.qmp_log("block-dirty-bitmap-remove",
447                   node=drive0.name, name="bitmap0")
448        log(query_bitmaps(vm), indent=2)
449        vm.shutdown()
450        log('')
451
452        log('--- Verification ---\n')
453        # 'simulated' failures will actually all pass here because we canceled
454        # while "pending". This is actually undefined behavior,
455        # don't rely on this to be true!
456        compare_images(bsync1, fbackup1, baseimg=fbackup0,
457                       expected_match=failure != 'intermediate')
458        if not failure or bsync_mode == 'always':
459            # Always keep the last backup on success or when using 'always'
460            base = bsync1
461        else:
462            base = fbackup0
463        compare_images(bsync2, fbackup2, baseimg=base)
464        compare_images(img_path, fbackup2)
465        log('')
466
467def test_backup_api():
468    """
469    Test malformed and prohibited invocations of the backup API.
470    """
471    with iotests.FilePaths(['img', 'bsync1']) as \
472         (img_path, backup_path), \
473         iotests.VM() as vm:
474
475        log("\n=== API failure tests ===\n")
476        log('--- Preparing image & VM ---\n')
477        drive0 = Drive(img_path, vm=vm)
478        drive0.img_create(iotests.imgfmt, SIZE)
479        vm.add_device("{},id=scsi0".format(iotests.get_virtio_scsi_device()))
480        vm.launch()
481
482        file_config = {
483            'driver': 'file',
484            'filename': drive0.path
485        }
486
487        vm.qmp_log('blockdev-add',
488                   filters=[iotests.filter_qmp_testfiles],
489                   node_name="drive0",
490                   driver=drive0.fmt,
491                   file=file_config)
492        drive0.node = 'drive0'
493        drive0.device = 'device0'
494        vm.qmp_log("device_add", id=drive0.device,
495                   drive=drive0.name, driver="scsi-hd")
496        log('')
497
498        target0 = Drive(backup_path, vm=vm)
499        target0.create_target("backup_target", drive0.fmt, drive0.size)
500        log('')
501
502        vm.qmp_log("block-dirty-bitmap-add", node=drive0.name,
503                   name="bitmap0", granularity=GRANULARITY)
504        log('')
505
506        log('-- Testing invalid QMP commands --\n')
507
508        error_cases = {
509            'incremental': {
510                None:        ['on-success', 'always', 'never', None],
511                'bitmap404': ['on-success', 'always', 'never', None],
512                'bitmap0':   ['always', 'never']
513            },
514            'bitmap': {
515                None:        ['on-success', 'always', 'never', None],
516                'bitmap404': ['on-success', 'always', 'never', None],
517                'bitmap0':   [None],
518            },
519            'full': {
520                None:        ['on-success', 'always', 'never'],
521                'bitmap404': ['on-success', 'always', 'never', None],
522                'bitmap0':   ['never', None],
523            },
524            'top': {
525                None:        ['on-success', 'always', 'never'],
526                'bitmap404': ['on-success', 'always', 'never', None],
527                'bitmap0':   ['never', None],
528            },
529            'none': {
530                None:        ['on-success', 'always', 'never'],
531                'bitmap404': ['on-success', 'always', 'never', None],
532                'bitmap0':   ['on-success', 'always', 'never', None],
533            }
534        }
535
536        # Dicts, as always, are not stably-ordered prior to 3.7, so use tuples:
537        for sync_mode in ('incremental', 'bitmap', 'full', 'top', 'none'):
538            log("-- Sync mode {:s} tests --\n".format(sync_mode))
539            for bitmap in (None, 'bitmap404', 'bitmap0'):
540                for policy in error_cases[sync_mode][bitmap]:
541                    blockdev_backup(drive0.vm, drive0.name, "backup_target",
542                                    sync_mode, job_id='api_job',
543                                    bitmap=bitmap, bitmap_mode=policy)
544                    log('')
545
546
547def main():
548    for bsync_mode in ("never", "on-success", "always"):
549        for failure in ("simulated", "intermediate", None):
550            test_bitmap_sync(bsync_mode, "bitmap", failure)
551
552    for sync_mode in ('full', 'top'):
553        for bsync_mode in ('on-success', 'always'):
554            for failure in ('simulated', 'intermediate', None):
555                test_bitmap_sync(bsync_mode, sync_mode, failure)
556
557    test_backup_api()
558
559if __name__ == '__main__':
560    iotests.script_main(main, supported_fmts=['qcow2'],
561                        supported_protocols=['file'])
562