xref: /openbmc/qemu/scripts/device-crash-test (revision 2182e405)
1#!/usr/bin/env python3
2#
3#  Copyright (c) 2017 Red Hat Inc
4#
5# Author:
6#  Eduardo Habkost <ehabkost@redhat.com>
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License along
19# with this program; if not, write to the Free Software Foundation, Inc.,
20# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21
22"""
23Run QEMU with all combinations of -machine and -device types,
24check for crashes and unexpected errors.
25"""
26
27import os
28import sys
29import glob
30import logging
31import traceback
32import re
33import random
34import argparse
35from itertools import chain
36
37sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
38from qemu.machine import QEMUMachine
39
40logger = logging.getLogger('device-crash-test')
41dbg = logger.debug
42
43
44# Purposes of the following rule list:
45# * Avoiding verbose log messages when we find known non-fatal
46#   (exitcode=1) errors
47# * Avoiding fatal errors when we find known crashes
48# * Skipping machines/devices that are known not to work out of
49#   the box, when running in --quick mode
50#
51# Keeping the rule list updated is desirable, but not required,
52# because unexpected cases where QEMU exits with exitcode=1 will
53# just trigger a INFO message.
54
55# Valid error rule keys:
56# * accel: regexp, full match only
57# * machine: regexp, full match only
58# * device: regexp, full match only
59# * log: regexp, partial match allowed
60# * exitcode: if not present, defaults to 1. If None, matches any exitcode
61# * warn: if True, matching failures will be logged as warnings
62# * expected: if True, QEMU is expected to always fail every time
63#   when testing the corresponding test case
64# * loglevel: log level of log output when there's a match.
65ERROR_RULE_LIST = [
66    # Machines that won't work out of the box:
67    #             MACHINE                         | ERROR MESSAGE
68    {'machine':'niagara', 'expected':True},       # Unable to load a firmware for -M niagara
69    {'machine':'boston', 'expected':True},        # Please provide either a -kernel or -bios argument
70    {'machine':'leon3_generic', 'expected':True}, # Can't read bios image (null)
71
72    # devices that don't work out of the box because they require extra options to "-device DEV":
73    #            DEVICE                                    | ERROR MESSAGE
74    {'device':'.*-(i386|x86_64)-cpu', 'expected':True},    # CPU socket-id is not set
75    {'device':'icp', 'expected':True},                     # icp_realize: required link 'xics' not found: Property '.xics' not found
76    {'device':'ics', 'expected':True},                     # ics_base_realize: required link 'xics' not found: Property '.xics' not found
77    # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True
78    {'device':'ide-cd'},                                 # No drive specified
79    {'device':'ide-drive', 'expected':True},               # No drive specified
80    {'device':'ide-hd', 'expected':True},                  # No drive specified
81    {'device':'ipmi-bmc-extern', 'expected':True},         # IPMI external bmc requires chardev attribute
82    {'device':'isa-debugcon', 'expected':True},            # Can't create serial device, empty char device
83    {'device':'isa-ipmi-bt', 'expected':True},             # IPMI device requires a bmc attribute to be set
84    {'device':'isa-ipmi-kcs', 'expected':True},            # IPMI device requires a bmc attribute to be set
85    {'device':'isa-parallel', 'expected':True},            # Can't create serial device, empty char device
86    {'device':'ivshmem-doorbell', 'expected':True},        # You must specify a 'chardev'
87    {'device':'ivshmem-plain', 'expected':True},           # You must specify a 'memdev'
88    {'device':'loader', 'expected':True},                  # please include valid arguments
89    {'device':'nand', 'expected':True},                    # Unsupported NAND block size 0x1
90    {'device':'nvdimm', 'expected':True},                  # 'memdev' property is not set
91    {'device':'nvme', 'expected':True},                    # Device initialization failed
92    {'device':'pc-dimm', 'expected':True},                 # 'memdev' property is not set
93    {'device':'pci-bridge', 'expected':True},              # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
94    {'device':'pci-bridge-seat', 'expected':True},         # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
95    {'device':'pxb', 'expected':True},                     # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
96    {'device':'scsi-block', 'expected':True},              # drive property not set
97    {'device':'scsi-disk', 'expected':True},               # drive property not set
98    {'device':'scsi-generic', 'expected':True},            # drive property not set
99    {'device':'scsi-hd', 'expected':True},                 # drive property not set
100    {'device':'spapr-pci-host-bridge', 'expected':True},   # BUID not specified for PHB
101    {'device':'spapr-rng', 'expected':True},               # spapr-rng needs an RNG backend!
102    {'device':'spapr-vty', 'expected':True},               # chardev property not set
103    {'device':'tpm-tis', 'expected':True},                 # tpm_tis: backend driver with id (null) could not be found
104    {'device':'unimplemented-device', 'expected':True},    # property 'size' not specified or zero
105    {'device':'usb-braille', 'expected':True},             # Property chardev is required
106    {'device':'usb-mtp', 'expected':True},                 # rootdir property must be configured
107    {'device':'usb-redir', 'expected':True},               # Parameter 'chardev' is missing
108    {'device':'usb-serial', 'expected':True},              # Property chardev is required
109    {'device':'usb-storage', 'expected':True},             # drive property not set
110    {'device':'vfio-amd-xgbe', 'expected':True},           # -device vfio-amd-xgbe: vfio error: wrong host device name
111    {'device':'vfio-calxeda-xgmac', 'expected':True},      # -device vfio-calxeda-xgmac: vfio error: wrong host device name
112    {'device':'vfio-pci', 'expected':True},                # No provided host device
113    {'device':'vfio-pci-igd-lpc-bridge', 'expected':True}, # VFIO dummy ISA/LPC bridge must have address 1f.0
114    {'device':'vhost-scsi.*', 'expected':True},            # vhost-scsi: missing wwpn
115    {'device':'vhost-vsock-device', 'expected':True},      # guest-cid property must be greater than 2
116    {'device':'vhost-vsock-pci', 'expected':True},         # guest-cid property must be greater than 2
117    {'device':'virtio-9p-ccw', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
118    {'device':'virtio-9p-device', 'expected':True},        # 9pfs device couldn't find fsdev with the id = NULL
119    {'device':'virtio-9p-pci', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
120    {'device':'virtio-blk-ccw', 'expected':True},          # drive property not set
121    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
122    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
123    {'device':'virtio-blk-pci', 'expected':True},          # drive property not set
124    {'device':'virtio-crypto-ccw', 'expected':True},       # 'cryptodev' parameter expects a valid object
125    {'device':'virtio-crypto-device', 'expected':True},    # 'cryptodev' parameter expects a valid object
126    {'device':'virtio-crypto-pci', 'expected':True},       # 'cryptodev' parameter expects a valid object
127    {'device':'virtio-input-host-device', 'expected':True}, # evdev property is required
128    {'device':'virtio-input-host-pci', 'expected':True},   # evdev property is required
129    {'device':'xen-pvdevice', 'expected':True},            # Device ID invalid, it must always be supplied
130    {'device':'vhost-vsock-ccw', 'expected':True},         # guest-cid property must be greater than 2
131    {'device':'zpci', 'expected':True},                    # target must be defined
132    {'device':'pnv-(occ|icp|lpc)', 'expected':True},       # required link 'xics' not found: Property '.xics' not found
133    {'device':'powernv-cpu-.*', 'expected':True},          # pnv_core_realize: required link 'xics' not found: Property '.xics' not found
134
135    # ioapic devices are already created by pc and will fail:
136    {'machine':'q35|pc.*', 'device':'kvm-ioapic', 'expected':True}, # Only 1 ioapics allowed
137    {'machine':'q35|pc.*', 'device':'ioapic', 'expected':True},     # Only 1 ioapics allowed
138
139    # "spapr-cpu-core needs a pseries machine"
140    {'machine':'(?!pseries).*', 'device':'.*-spapr-cpu-core', 'expected':True},
141
142    # KVM-specific devices shouldn't be tried without accel=kvm:
143    {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True},
144
145    # xen-specific machines and devices:
146    {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True},
147    {'accel':'(?!xen).*', 'device':'xen-.*', 'expected':True},
148
149    # this fails on some machine-types, but not all, so they don't have expected=True:
150    {'device':'vmgenid'}, # vmgenid requires DMA write support in fw_cfg, which this machine type does not provide
151
152    # Silence INFO messages for errors that are common on multiple
153    # devices/machines:
154    {'log':r"No '[\w-]+' bus found for device '[\w-]+'"},
155    {'log':r"images* must be given with the 'pflash' parameter"},
156    {'log':r"(Guest|ROM|Flash|Kernel) image must be specified"},
157    {'log':r"[cC]ould not load [\w ]+ (BIOS|bios) '[\w-]+\.bin'"},
158    {'log':r"Couldn't find rom image '[\w-]+\.bin'"},
159    {'log':r"speed mismatch trying to attach usb device"},
160    {'log':r"Can't create a second ISA bus"},
161    {'log':r"duplicate fw_cfg file name"},
162    # sysbus-related error messages: most machines reject most dynamic sysbus devices:
163    {'log':r"Option '-device [\w.,-]+' cannot be handled by this machine"},
164    {'log':r"Device [\w.,-]+ is not supported by this machine yet"},
165    {'log':r"Device [\w.,-]+ can not be dynamically instantiated"},
166    {'log':r"Platform Bus: Can not fit MMIO region of size "},
167    # other more specific errors we will ignore:
168    {'device':'.*-spapr-cpu-core', 'log':r"CPU core type should be"},
169    {'log':r"MSI(-X)? is not supported by interrupt controller"},
170    {'log':r"pxb-pcie? devices cannot reside on a PCIe? bus"},
171    {'log':r"Ignoring smp_cpus value"},
172    {'log':r"sd_init failed: Drive 'sd0' is already in use because it has been automatically connected to another device"},
173    {'log':r"This CPU requires a smaller page size than the system is using"},
174    {'log':r"MSI-X support is mandatory in the S390 architecture"},
175    {'log':r"rom check and register reset failed"},
176    {'log':r"Unable to initialize GIC, CPUState for CPU#0 not valid"},
177    {'log':r"Multiple VT220 operator consoles are not supported"},
178    {'log':r"core 0 already populated"},
179    {'log':r"could not find stage1 bootloader"},
180
181    # other exitcode=1 failures not listed above will just generate INFO messages:
182    {'exitcode':1, 'loglevel':logging.INFO},
183
184    # everything else (including SIGABRT and SIGSEGV) will be a fatal error:
185    {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL},
186]
187
188
189def errorRuleTestCaseMatch(rule, t):
190    """Check if a test case specification can match a error rule
191
192    This only checks if a error rule is a candidate match
193    for a given test case, it won't check if the test case
194    results/output match the rule.  See ruleListResultMatch().
195    """
196    return (('machine' not in rule or
197             'machine' not in t or
198             re.match(rule['machine'] + '$', t['machine'])) and
199            ('accel' not in rule or
200             'accel' not in t or
201             re.match(rule['accel'] + '$', t['accel'])) and
202            ('device' not in rule or
203             'device' not in t or
204             re.match(rule['device'] + '$', t['device'])))
205
206
207def ruleListCandidates(t):
208    """Generate the list of candidates that can match a test case"""
209    for i, rule in enumerate(ERROR_RULE_LIST):
210        if errorRuleTestCaseMatch(rule, t):
211            yield (i, rule)
212
213
214def findExpectedResult(t):
215    """Check if there's an expected=True error rule for a test case
216
217    Returns (i, rule) tuple, where i is the index in
218    ERROR_RULE_LIST and rule is the error rule itself.
219    """
220    for i, rule in ruleListCandidates(t):
221        if rule.get('expected'):
222            return (i, rule)
223
224
225def ruleListResultMatch(rule, r):
226    """Check if test case results/output match a error rule
227
228    It is valid to call this function only if
229    errorRuleTestCaseMatch() is True for the rule (e.g. on
230    rules returned by ruleListCandidates())
231    """
232    assert errorRuleTestCaseMatch(rule, r['testcase'])
233    return ((rule.get('exitcode', 1) is None or
234             r['exitcode'] == rule.get('exitcode', 1)) and
235            ('log' not in rule or
236             re.search(rule['log'], r['log'], re.MULTILINE)))
237
238
239def checkResultRuleList(r):
240    """Look up error rule for a given test case result
241
242    Returns (i, rule) tuple, where i is the index in
243    ERROR_RULE_LIST and rule is the error rule itself.
244    """
245    for i, rule in ruleListCandidates(r['testcase']):
246        if ruleListResultMatch(rule, r):
247            return i, rule
248
249    raise Exception("this should never happen")
250
251
252def qemuOptsEscape(s):
253    """Escape option value QemuOpts"""
254    return s.replace(",", ",,")
255
256
257def formatTestCase(t):
258    """Format test case info as "key=value key=value" for prettier logging output"""
259    return ' '.join('%s=%s' % (k, v) for k, v in t.items())
260
261
262def qomListTypeNames(vm, **kwargs):
263    """Run qom-list-types QMP command, return type names"""
264    types = vm.command('qom-list-types', **kwargs)
265    return [t['name'] for t in types]
266
267
268def infoQDM(vm):
269    """Parse 'info qdm' output"""
270    args = {'command-line': 'info qdm'}
271    devhelp = vm.command('human-monitor-command', **args)
272    for l in devhelp.split('\n'):
273        l = l.strip()
274        if l == '' or l.endswith(':'):
275            continue
276        d = {'name': re.search(r'name "([^"]+)"', l).group(1),
277             'no-user': (re.search(', no-user', l) is not None)}
278        yield d
279
280
281class QemuBinaryInfo(object):
282    def __init__(self, binary, devtype):
283        if devtype is None:
284            devtype = 'device'
285
286        self.binary = binary
287        self._machine_info = {}
288
289        dbg("devtype: %r", devtype)
290        args = ['-S', '-machine', 'none,accel=kvm:tcg']
291        dbg("querying info for QEMU binary: %s", binary)
292        vm = QEMUMachine(binary=binary, args=args)
293        vm.launch()
294        try:
295            self.alldevs = set(qomListTypeNames(vm, implements=devtype, abstract=False))
296            # there's no way to query DeviceClass::user_creatable using QMP,
297            # so use 'info qdm':
298            self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']])
299            self.machines = list(m['name'] for m in vm.command('query-machines'))
300            self.user_devs = self.alldevs.difference(self.no_user_devs)
301            self.kvm_available = vm.command('query-kvm')['enabled']
302        finally:
303            vm.shutdown()
304
305    def machineInfo(self, machine):
306        """Query for information on a specific machine-type
307
308        Results are cached internally, in case the same machine-
309        type is queried multiple times.
310        """
311        if machine in self._machine_info:
312            return self._machine_info[machine]
313
314        mi = {}
315        args = ['-S', '-machine', '%s' % (machine)]
316        dbg("querying machine info for binary=%s machine=%s", self.binary, machine)
317        vm = QEMUMachine(binary=self.binary, args=args)
318        try:
319            vm.launch()
320            mi['runnable'] = True
321        except KeyboardInterrupt:
322            raise
323        except:
324            dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info())
325            dbg("log: %r", vm.get_log())
326            mi['runnable'] = False
327
328        vm.shutdown()
329        self._machine_info[machine] = mi
330        return mi
331
332
333BINARY_INFO = {}
334
335
336def getBinaryInfo(args, binary):
337    if binary not in BINARY_INFO:
338        BINARY_INFO[binary] = QemuBinaryInfo(binary, args.devtype)
339    return BINARY_INFO[binary]
340
341
342def checkOneCase(args, testcase):
343    """Check one specific case
344
345    Returns a dictionary containing failure information on error,
346    or None on success
347    """
348    binary = testcase['binary']
349    accel = testcase['accel']
350    machine = testcase['machine']
351    device = testcase['device']
352
353    dbg("will test: %r", testcase)
354
355    args = ['-S', '-machine', '%s,accel=%s' % (machine, accel),
356            '-device', qemuOptsEscape(device)]
357    cmdline = ' '.join([binary] + args)
358    dbg("will launch QEMU: %s", cmdline)
359    vm = QEMUMachine(binary=binary, args=args)
360
361    exc_traceback = None
362    try:
363        vm.launch()
364    except KeyboardInterrupt:
365        raise
366    except:
367        exc_traceback = traceback.format_exc()
368        dbg("Exception while running test case")
369    finally:
370        vm.shutdown()
371        ec = vm.exitcode()
372        log = vm.get_log()
373
374    if exc_traceback is not None or ec != 0:
375        return {'exc_traceback':exc_traceback,
376                'exitcode':ec,
377                'log':log,
378                'testcase':testcase,
379                'cmdline':cmdline}
380
381
382def binariesToTest(args, testcase):
383    if args.qemu:
384        r = args.qemu
385    else:
386        r = [f.path for f in os.scandir('.')
387             if f.name.startswith('qemu-system-') and
388                f.is_file() and os.access(f, os.X_OK)]
389    return r
390
391
392def accelsToTest(args, testcase):
393    if getBinaryInfo(args, testcase['binary']).kvm_available:
394        yield 'kvm'
395    yield 'tcg'
396
397
398def machinesToTest(args, testcase):
399    return getBinaryInfo(args, testcase['binary']).machines
400
401
402def devicesToTest(args, testcase):
403    return getBinaryInfo(args, testcase['binary']).user_devs
404
405
406TESTCASE_VARIABLES = [
407    ('binary', binariesToTest),
408    ('accel', accelsToTest),
409    ('machine', machinesToTest),
410    ('device', devicesToTest),
411]
412
413
414def genCases1(args, testcases, var, fn):
415    """Generate new testcases for one variable
416
417    If an existing item already has a variable set, don't
418    generate new items and just return it directly. This
419    allows the "-t" command-line option to be used to choose
420    a specific test case.
421    """
422    for testcase in testcases:
423        if var in testcase:
424            yield testcase.copy()
425        else:
426            for i in fn(args, testcase):
427                t = testcase.copy()
428                t[var] = i
429                yield t
430
431
432def genCases(args, testcase):
433    """Generate test cases for all variables
434    """
435    cases = [testcase.copy()]
436    for var, fn in TESTCASE_VARIABLES:
437        dbg("var: %r, fn: %r", var, fn)
438        cases = genCases1(args, cases, var, fn)
439    return cases
440
441
442def casesToTest(args, testcase):
443    cases = genCases(args, testcase)
444    if args.random:
445        cases = list(cases)
446        cases = random.sample(cases, min(args.random, len(cases)))
447    if args.debug:
448        cases = list(cases)
449        dbg("%d test cases to test", len(cases))
450    if args.shuffle:
451        cases = list(cases)
452        random.shuffle(cases)
453    return cases
454
455
456def logFailure(f, level):
457    t = f['testcase']
458    logger.log(level, "failed: %s", formatTestCase(t))
459    logger.log(level, "cmdline: %s", f['cmdline'])
460    for l in f['log'].strip().split('\n'):
461        logger.log(level, "log: %s", l)
462    logger.log(level, "exit code: %r", f['exitcode'])
463    if f['exc_traceback']:
464        logger.log(level, "exception:")
465        for l in f['exc_traceback'].split('\n'):
466            logger.log(level, "  %s", l.rstrip('\n'))
467
468
469def main():
470    parser = argparse.ArgumentParser(description="QEMU -device crash test")
471    parser.add_argument('-t', metavar='KEY=VALUE', nargs='*',
472                        help="Limit test cases to KEY=VALUE",
473                        action='append', dest='testcases', default=[])
474    parser.add_argument('-d', '--debug', action='store_true',
475                        help='debug output')
476    parser.add_argument('-v', '--verbose', action='store_true', default=True,
477                        help='verbose output')
478    parser.add_argument('-q', '--quiet', dest='verbose', action='store_false',
479                        help='non-verbose output')
480    parser.add_argument('-r', '--random', type=int, metavar='COUNT',
481                        help='run a random sample of COUNT test cases',
482                        default=0)
483    parser.add_argument('--shuffle', action='store_true',
484                        help='Run test cases in random order')
485    parser.add_argument('--dry-run', action='store_true',
486                        help="Don't run any tests, just generate list")
487    parser.add_argument('-D', '--devtype', metavar='TYPE',
488                        help="Test only device types that implement TYPE")
489    parser.add_argument('-Q', '--quick', action='store_true', default=True,
490                        help="Quick mode: skip test cases that are expected to fail")
491    parser.add_argument('-F', '--full', action='store_false', dest='quick',
492                        help="Full mode: test cases that are expected to fail")
493    parser.add_argument('--strict', action='store_true', dest='strict',
494                        help="Treat all warnings as fatal")
495    parser.add_argument('qemu', nargs='*', metavar='QEMU',
496                        help='QEMU binary to run')
497    args = parser.parse_args()
498
499    if args.debug:
500        lvl = logging.DEBUG
501    elif args.verbose:
502        lvl = logging.INFO
503    else:
504        lvl = logging.WARN
505    logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s')
506
507    fatal_failures = []
508    wl_stats = {}
509    skipped = 0
510    total = 0
511
512    tc = {}
513    dbg("testcases: %r", args.testcases)
514    if args.testcases:
515        for t in chain(*args.testcases):
516            for kv in t.split():
517                k, v = kv.split('=', 1)
518                tc[k] = v
519
520    if len(binariesToTest(args, tc)) == 0:
521        print("No QEMU binary found", file=sys.stderr)
522        parser.print_usage(sys.stderr)
523        return 1
524
525    for t in casesToTest(args, tc):
526        logger.info("running test case: %s", formatTestCase(t))
527        total += 1
528
529        expected_match = findExpectedResult(t)
530        if (args.quick and
531                (expected_match or
532                 not getBinaryInfo(args, t['binary']).machineInfo(t['machine'])['runnable'])):
533            dbg("skipped: %s", formatTestCase(t))
534            skipped += 1
535            continue
536
537        if args.dry_run:
538            continue
539
540        try:
541            f = checkOneCase(args, t)
542        except KeyboardInterrupt:
543            break
544
545        if f:
546            i, rule = checkResultRuleList(f)
547            dbg("testcase: %r, rule list match: %r", t, rule)
548            wl_stats.setdefault(i, []).append(f)
549            level = rule.get('loglevel', logging.DEBUG)
550            logFailure(f, level)
551            if rule.get('fatal') or (args.strict and level >= logging.WARN):
552                fatal_failures.append(f)
553        else:
554            dbg("success: %s", formatTestCase(t))
555            if expected_match:
556                logger.warn("Didn't fail as expected: %s", formatTestCase(t))
557
558    logger.info("Total: %d test cases", total)
559    if skipped:
560        logger.info("Skipped %d test cases", skipped)
561
562    if args.debug:
563        stats = sorted([(len(wl_stats.get(i, [])), rule) for i, rule in
564                         enumerate(ERROR_RULE_LIST)], key=lambda x: x[0])
565        for count, rule in stats:
566            dbg("error rule stats: %d: %r", count, rule)
567
568    if fatal_failures:
569        for f in fatal_failures:
570            t = f['testcase']
571            logger.error("Fatal failure: %s", formatTestCase(t))
572        logger.error("Fatal failures on some machine/device combinations")
573        return 1
574
575if __name__ == '__main__':
576    sys.exit(main())
577