xref: /openbmc/qemu/scripts/device-crash-test (revision 5e437d3c)
1#!/usr/bin/env python3
2#
3#  Copyright (c) 2017 Red Hat Inc
4#
5# Author:
6#  Eduardo Habkost <ehabkost@redhat.com>
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License along
19# with this program; if not, write to the Free Software Foundation, Inc.,
20# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21
22"""
23Run QEMU with all combinations of -machine and -device types,
24check for crashes and unexpected errors.
25"""
26
27import os
28import sys
29import glob
30import logging
31import traceback
32import re
33import random
34import argparse
35from itertools import chain
36
37sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
38from qemu.machine import QEMUMachine
39
40logger = logging.getLogger('device-crash-test')
41dbg = logger.debug
42
43
44# Purposes of the following rule list:
45# * Avoiding verbose log messages when we find known non-fatal
46#   (exitcode=1) errors
47# * Avoiding fatal errors when we find known crashes
48# * Skipping machines/devices that are known not to work out of
49#   the box, when running in --quick mode
50#
51# Keeping the rule list updated is desirable, but not required,
52# because unexpected cases where QEMU exits with exitcode=1 will
53# just trigger a INFO message.
54
55# Valid error rule keys:
56# * accel: regexp, full match only
57# * machine: regexp, full match only
58# * device: regexp, full match only
59# * log: regexp, partial match allowed
60# * exitcode: if not present, defaults to 1. If None, matches any exitcode
61# * warn: if True, matching failures will be logged as warnings
62# * expected: if True, QEMU is expected to always fail every time
63#   when testing the corresponding test case
64# * loglevel: log level of log output when there's a match.
65ERROR_RULE_LIST = [
66    # Machines that won't work out of the box:
67    #             MACHINE                         | ERROR MESSAGE
68    {'machine':'niagara', 'expected':True},       # Unable to load a firmware for -M niagara
69    {'machine':'boston', 'expected':True},        # Please provide either a -kernel or -bios argument
70    {'machine':'leon3_generic', 'expected':True}, # Can't read bios image (null)
71
72    # devices that don't work out of the box because they require extra options to "-device DEV":
73    #            DEVICE                                    | ERROR MESSAGE
74    {'device':'.*-(i386|x86_64)-cpu', 'expected':True},    # CPU socket-id is not set
75    {'device':'icp', 'expected':True},                     # icp_realize: required link 'xics' not found: Property '.xics' not found
76    {'device':'ics', 'expected':True},                     # ics_base_realize: required link 'xics' not found: Property '.xics' not found
77    # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True
78    {'device':'ide-cd'},                                 # No drive specified
79    {'device':'ide-hd', 'expected':True},                  # No drive specified
80    {'device':'ipmi-bmc-extern', 'expected':True},         # IPMI external bmc requires chardev attribute
81    {'device':'isa-debugcon', 'expected':True},            # Can't create serial device, empty char device
82    {'device':'isa-ipmi-bt', 'expected':True},             # IPMI device requires a bmc attribute to be set
83    {'device':'isa-ipmi-kcs', 'expected':True},            # IPMI device requires a bmc attribute to be set
84    {'device':'isa-parallel', 'expected':True},            # Can't create serial device, empty char device
85    {'device':'ivshmem-doorbell', 'expected':True},        # You must specify a 'chardev'
86    {'device':'ivshmem-plain', 'expected':True},           # You must specify a 'memdev'
87    {'device':'loader', 'expected':True},                  # please include valid arguments
88    {'device':'nand', 'expected':True},                    # Unsupported NAND block size 0x1
89    {'device':'nvdimm', 'expected':True},                  # 'memdev' property is not set
90    {'device':'nvme', 'expected':True},                    # Device initialization failed
91    {'device':'pc-dimm', 'expected':True},                 # 'memdev' property is not set
92    {'device':'pci-bridge', 'expected':True},              # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
93    {'device':'pci-bridge-seat', 'expected':True},         # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
94    {'device':'pxb', 'expected':True},                     # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
95    {'device':'scsi-block', 'expected':True},              # drive property not set
96    {'device':'scsi-generic', 'expected':True},            # drive property not set
97    {'device':'scsi-hd', 'expected':True},                 # drive property not set
98    {'device':'spapr-pci-host-bridge', 'expected':True},   # BUID not specified for PHB
99    {'device':'spapr-rng', 'expected':True},               # spapr-rng needs an RNG backend!
100    {'device':'spapr-vty', 'expected':True},               # chardev property not set
101    {'device':'tpm-tis', 'expected':True},                 # tpm_tis: backend driver with id (null) could not be found
102    {'device':'unimplemented-device', 'expected':True},    # property 'size' not specified or zero
103    {'device':'usb-braille', 'expected':True},             # Property chardev is required
104    {'device':'usb-mtp', 'expected':True},                 # rootdir property must be configured
105    {'device':'usb-redir', 'expected':True},               # Parameter 'chardev' is missing
106    {'device':'usb-serial', 'expected':True},              # Property chardev is required
107    {'device':'usb-storage', 'expected':True},             # drive property not set
108    {'device':'vfio-amd-xgbe', 'expected':True},           # -device vfio-amd-xgbe: vfio error: wrong host device name
109    {'device':'vfio-calxeda-xgmac', 'expected':True},      # -device vfio-calxeda-xgmac: vfio error: wrong host device name
110    {'device':'vfio-pci', 'expected':True},                # No provided host device
111    {'device':'vfio-pci-igd-lpc-bridge', 'expected':True}, # VFIO dummy ISA/LPC bridge must have address 1f.0
112    {'device':'vhost-scsi.*', 'expected':True},            # vhost-scsi: missing wwpn
113    {'device':'vhost-vsock-device', 'expected':True},      # guest-cid property must be greater than 2
114    {'device':'vhost-vsock-pci', 'expected':True},         # guest-cid property must be greater than 2
115    {'device':'virtio-9p-ccw', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
116    {'device':'virtio-9p-device', 'expected':True},        # 9pfs device couldn't find fsdev with the id = NULL
117    {'device':'virtio-9p-pci', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
118    {'device':'virtio-blk-ccw', 'expected':True},          # drive property not set
119    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
120    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
121    {'device':'virtio-blk-pci', 'expected':True},          # drive property not set
122    {'device':'virtio-crypto-ccw', 'expected':True},       # 'cryptodev' parameter expects a valid object
123    {'device':'virtio-crypto-device', 'expected':True},    # 'cryptodev' parameter expects a valid object
124    {'device':'virtio-crypto-pci', 'expected':True},       # 'cryptodev' parameter expects a valid object
125    {'device':'virtio-input-host-device', 'expected':True}, # evdev property is required
126    {'device':'virtio-input-host-pci', 'expected':True},   # evdev property is required
127    {'device':'xen-pvdevice', 'expected':True},            # Device ID invalid, it must always be supplied
128    {'device':'vhost-vsock-ccw', 'expected':True},         # guest-cid property must be greater than 2
129    {'device':'zpci', 'expected':True},                    # target must be defined
130    {'device':'pnv-(occ|icp|lpc)', 'expected':True},       # required link 'xics' not found: Property '.xics' not found
131    {'device':'powernv-cpu-.*', 'expected':True},          # pnv_core_realize: required link 'xics' not found: Property '.xics' not found
132
133    # ioapic devices are already created by pc and will fail:
134    {'machine':'q35|pc.*', 'device':'kvm-ioapic', 'expected':True}, # Only 1 ioapics allowed
135    {'machine':'q35|pc.*', 'device':'ioapic', 'expected':True},     # Only 1 ioapics allowed
136
137    # "spapr-cpu-core needs a pseries machine"
138    {'machine':'(?!pseries).*', 'device':'.*-spapr-cpu-core', 'expected':True},
139
140    # KVM-specific devices shouldn't be tried without accel=kvm:
141    {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True},
142
143    # xen-specific machines and devices:
144    {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True},
145    {'accel':'(?!xen).*', 'device':'xen-.*', 'expected':True},
146
147    # this fails on some machine-types, but not all, so they don't have expected=True:
148    {'device':'vmgenid'}, # vmgenid requires DMA write support in fw_cfg, which this machine type does not provide
149
150    # Silence INFO messages for errors that are common on multiple
151    # devices/machines:
152    {'log':r"No '[\w-]+' bus found for device '[\w-]+'"},
153    {'log':r"images* must be given with the 'pflash' parameter"},
154    {'log':r"(Guest|ROM|Flash|Kernel) image must be specified"},
155    {'log':r"[cC]ould not load [\w ]+ (BIOS|bios) '[\w-]+\.bin'"},
156    {'log':r"Couldn't find rom image '[\w-]+\.bin'"},
157    {'log':r"speed mismatch trying to attach usb device"},
158    {'log':r"Can't create a second ISA bus"},
159    {'log':r"duplicate fw_cfg file name"},
160    # sysbus-related error messages: most machines reject most dynamic sysbus devices:
161    {'log':r"Option '-device [\w.,-]+' cannot be handled by this machine"},
162    {'log':r"Device [\w.,-]+ is not supported by this machine yet"},
163    {'log':r"Device [\w.,-]+ can not be dynamically instantiated"},
164    {'log':r"Platform Bus: Can not fit MMIO region of size "},
165    # other more specific errors we will ignore:
166    {'device':'.*-spapr-cpu-core', 'log':r"CPU core type should be"},
167    {'log':r"MSI(-X)? is not supported by interrupt controller"},
168    {'log':r"pxb-pcie? devices cannot reside on a PCIe? bus"},
169    {'log':r"Ignoring smp_cpus value"},
170    {'log':r"sd_init failed: Drive 'sd0' is already in use because it has been automatically connected to another device"},
171    {'log':r"This CPU requires a smaller page size than the system is using"},
172    {'log':r"MSI-X support is mandatory in the S390 architecture"},
173    {'log':r"rom check and register reset failed"},
174    {'log':r"Unable to initialize GIC, CPUState for CPU#0 not valid"},
175    {'log':r"Multiple VT220 operator consoles are not supported"},
176    {'log':r"core 0 already populated"},
177    {'log':r"could not find stage1 bootloader"},
178
179    # other exitcode=1 failures not listed above will just generate INFO messages:
180    {'exitcode':1, 'loglevel':logging.INFO},
181
182    # everything else (including SIGABRT and SIGSEGV) will be a fatal error:
183    {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL},
184]
185
186
187def errorRuleTestCaseMatch(rule, t):
188    """Check if a test case specification can match a error rule
189
190    This only checks if a error rule is a candidate match
191    for a given test case, it won't check if the test case
192    results/output match the rule.  See ruleListResultMatch().
193    """
194    return (('machine' not in rule or
195             'machine' not in t or
196             re.match(rule['machine'] + '$', t['machine'])) and
197            ('accel' not in rule or
198             'accel' not in t or
199             re.match(rule['accel'] + '$', t['accel'])) and
200            ('device' not in rule or
201             'device' not in t or
202             re.match(rule['device'] + '$', t['device'])))
203
204
205def ruleListCandidates(t):
206    """Generate the list of candidates that can match a test case"""
207    for i, rule in enumerate(ERROR_RULE_LIST):
208        if errorRuleTestCaseMatch(rule, t):
209            yield (i, rule)
210
211
212def findExpectedResult(t):
213    """Check if there's an expected=True error rule for a test case
214
215    Returns (i, rule) tuple, where i is the index in
216    ERROR_RULE_LIST and rule is the error rule itself.
217    """
218    for i, rule in ruleListCandidates(t):
219        if rule.get('expected'):
220            return (i, rule)
221
222
223def ruleListResultMatch(rule, r):
224    """Check if test case results/output match a error rule
225
226    It is valid to call this function only if
227    errorRuleTestCaseMatch() is True for the rule (e.g. on
228    rules returned by ruleListCandidates())
229    """
230    assert errorRuleTestCaseMatch(rule, r['testcase'])
231    return ((rule.get('exitcode', 1) is None or
232             r['exitcode'] == rule.get('exitcode', 1)) and
233            ('log' not in rule or
234             re.search(rule['log'], r['log'], re.MULTILINE)))
235
236
237def checkResultRuleList(r):
238    """Look up error rule for a given test case result
239
240    Returns (i, rule) tuple, where i is the index in
241    ERROR_RULE_LIST and rule is the error rule itself.
242    """
243    for i, rule in ruleListCandidates(r['testcase']):
244        if ruleListResultMatch(rule, r):
245            return i, rule
246
247    raise Exception("this should never happen")
248
249
250def qemuOptsEscape(s):
251    """Escape option value QemuOpts"""
252    return s.replace(",", ",,")
253
254
255def formatTestCase(t):
256    """Format test case info as "key=value key=value" for prettier logging output"""
257    return ' '.join('%s=%s' % (k, v) for k, v in t.items())
258
259
260def qomListTypeNames(vm, **kwargs):
261    """Run qom-list-types QMP command, return type names"""
262    types = vm.command('qom-list-types', **kwargs)
263    return [t['name'] for t in types]
264
265
266def infoQDM(vm):
267    """Parse 'info qdm' output"""
268    args = {'command-line': 'info qdm'}
269    devhelp = vm.command('human-monitor-command', **args)
270    for l in devhelp.split('\n'):
271        l = l.strip()
272        if l == '' or l.endswith(':'):
273            continue
274        d = {'name': re.search(r'name "([^"]+)"', l).group(1),
275             'no-user': (re.search(', no-user', l) is not None)}
276        yield d
277
278
279class QemuBinaryInfo(object):
280    def __init__(self, binary, devtype):
281        if devtype is None:
282            devtype = 'device'
283
284        self.binary = binary
285        self._machine_info = {}
286
287        dbg("devtype: %r", devtype)
288        args = ['-S', '-machine', 'none,accel=kvm:tcg']
289        dbg("querying info for QEMU binary: %s", binary)
290        vm = QEMUMachine(binary=binary, args=args)
291        vm.launch()
292        try:
293            self.alldevs = set(qomListTypeNames(vm, implements=devtype, abstract=False))
294            # there's no way to query DeviceClass::user_creatable using QMP,
295            # so use 'info qdm':
296            self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']])
297            self.machines = list(m['name'] for m in vm.command('query-machines'))
298            self.user_devs = self.alldevs.difference(self.no_user_devs)
299            self.kvm_available = vm.command('query-kvm')['enabled']
300        finally:
301            vm.shutdown()
302
303    def machineInfo(self, machine):
304        """Query for information on a specific machine-type
305
306        Results are cached internally, in case the same machine-
307        type is queried multiple times.
308        """
309        if machine in self._machine_info:
310            return self._machine_info[machine]
311
312        mi = {}
313        args = ['-S', '-machine', '%s' % (machine)]
314        dbg("querying machine info for binary=%s machine=%s", self.binary, machine)
315        vm = QEMUMachine(binary=self.binary, args=args)
316        try:
317            vm.launch()
318            mi['runnable'] = True
319        except KeyboardInterrupt:
320            raise
321        except:
322            dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info())
323            dbg("log: %r", vm.get_log())
324            mi['runnable'] = False
325
326        vm.shutdown()
327        self._machine_info[machine] = mi
328        return mi
329
330
331BINARY_INFO = {}
332
333
334def getBinaryInfo(args, binary):
335    if binary not in BINARY_INFO:
336        BINARY_INFO[binary] = QemuBinaryInfo(binary, args.devtype)
337    return BINARY_INFO[binary]
338
339
340def checkOneCase(args, testcase):
341    """Check one specific case
342
343    Returns a dictionary containing failure information on error,
344    or None on success
345    """
346    binary = testcase['binary']
347    accel = testcase['accel']
348    machine = testcase['machine']
349    device = testcase['device']
350
351    dbg("will test: %r", testcase)
352
353    args = ['-S', '-machine', '%s,accel=%s' % (machine, accel),
354            '-device', qemuOptsEscape(device)]
355    cmdline = ' '.join([binary] + args)
356    dbg("will launch QEMU: %s", cmdline)
357    vm = QEMUMachine(binary=binary, args=args)
358
359    exc_traceback = None
360    try:
361        vm.launch()
362    except KeyboardInterrupt:
363        raise
364    except:
365        exc_traceback = traceback.format_exc()
366        dbg("Exception while running test case")
367    finally:
368        vm.shutdown()
369        ec = vm.exitcode()
370        log = vm.get_log()
371
372    if exc_traceback is not None or ec != 0:
373        return {'exc_traceback':exc_traceback,
374                'exitcode':ec,
375                'log':log,
376                'testcase':testcase,
377                'cmdline':cmdline}
378
379
380def binariesToTest(args, testcase):
381    if args.qemu:
382        r = args.qemu
383    else:
384        r = [f.path for f in os.scandir('.')
385             if f.name.startswith('qemu-system-') and
386                f.is_file() and os.access(f, os.X_OK)]
387    return r
388
389
390def accelsToTest(args, testcase):
391    if getBinaryInfo(args, testcase['binary']).kvm_available:
392        yield 'kvm'
393    yield 'tcg'
394
395
396def machinesToTest(args, testcase):
397    return getBinaryInfo(args, testcase['binary']).machines
398
399
400def devicesToTest(args, testcase):
401    return getBinaryInfo(args, testcase['binary']).user_devs
402
403
404TESTCASE_VARIABLES = [
405    ('binary', binariesToTest),
406    ('accel', accelsToTest),
407    ('machine', machinesToTest),
408    ('device', devicesToTest),
409]
410
411
412def genCases1(args, testcases, var, fn):
413    """Generate new testcases for one variable
414
415    If an existing item already has a variable set, don't
416    generate new items and just return it directly. This
417    allows the "-t" command-line option to be used to choose
418    a specific test case.
419    """
420    for testcase in testcases:
421        if var in testcase:
422            yield testcase.copy()
423        else:
424            for i in fn(args, testcase):
425                t = testcase.copy()
426                t[var] = i
427                yield t
428
429
430def genCases(args, testcase):
431    """Generate test cases for all variables
432    """
433    cases = [testcase.copy()]
434    for var, fn in TESTCASE_VARIABLES:
435        dbg("var: %r, fn: %r", var, fn)
436        cases = genCases1(args, cases, var, fn)
437    return cases
438
439
440def casesToTest(args, testcase):
441    cases = genCases(args, testcase)
442    if args.random:
443        cases = list(cases)
444        cases = random.sample(cases, min(args.random, len(cases)))
445    if args.debug:
446        cases = list(cases)
447        dbg("%d test cases to test", len(cases))
448    if args.shuffle:
449        cases = list(cases)
450        random.shuffle(cases)
451    return cases
452
453
454def logFailure(f, level):
455    t = f['testcase']
456    logger.log(level, "failed: %s", formatTestCase(t))
457    logger.log(level, "cmdline: %s", f['cmdline'])
458    for l in f['log'].strip().split('\n'):
459        logger.log(level, "log: %s", l)
460    logger.log(level, "exit code: %r", f['exitcode'])
461    if f['exc_traceback']:
462        logger.log(level, "exception:")
463        for l in f['exc_traceback'].split('\n'):
464            logger.log(level, "  %s", l.rstrip('\n'))
465
466
467def main():
468    parser = argparse.ArgumentParser(description="QEMU -device crash test")
469    parser.add_argument('-t', metavar='KEY=VALUE', nargs='*',
470                        help="Limit test cases to KEY=VALUE",
471                        action='append', dest='testcases', default=[])
472    parser.add_argument('-d', '--debug', action='store_true',
473                        help='debug output')
474    parser.add_argument('-v', '--verbose', action='store_true', default=True,
475                        help='verbose output')
476    parser.add_argument('-q', '--quiet', dest='verbose', action='store_false',
477                        help='non-verbose output')
478    parser.add_argument('-r', '--random', type=int, metavar='COUNT',
479                        help='run a random sample of COUNT test cases',
480                        default=0)
481    parser.add_argument('--shuffle', action='store_true',
482                        help='Run test cases in random order')
483    parser.add_argument('--dry-run', action='store_true',
484                        help="Don't run any tests, just generate list")
485    parser.add_argument('-D', '--devtype', metavar='TYPE',
486                        help="Test only device types that implement TYPE")
487    parser.add_argument('-Q', '--quick', action='store_true', default=True,
488                        help="Quick mode: skip test cases that are expected to fail")
489    parser.add_argument('-F', '--full', action='store_false', dest='quick',
490                        help="Full mode: test cases that are expected to fail")
491    parser.add_argument('--strict', action='store_true', dest='strict',
492                        help="Treat all warnings as fatal")
493    parser.add_argument('qemu', nargs='*', metavar='QEMU',
494                        help='QEMU binary to run')
495    args = parser.parse_args()
496
497    if args.debug:
498        lvl = logging.DEBUG
499    elif args.verbose:
500        lvl = logging.INFO
501    else:
502        lvl = logging.WARN
503    logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s')
504
505    fatal_failures = []
506    wl_stats = {}
507    skipped = 0
508    total = 0
509
510    tc = {}
511    dbg("testcases: %r", args.testcases)
512    if args.testcases:
513        for t in chain(*args.testcases):
514            for kv in t.split():
515                k, v = kv.split('=', 1)
516                tc[k] = v
517
518    if len(binariesToTest(args, tc)) == 0:
519        print("No QEMU binary found", file=sys.stderr)
520        parser.print_usage(sys.stderr)
521        return 1
522
523    for t in casesToTest(args, tc):
524        logger.info("running test case: %s", formatTestCase(t))
525        total += 1
526
527        expected_match = findExpectedResult(t)
528        if (args.quick and
529                (expected_match or
530                 not getBinaryInfo(args, t['binary']).machineInfo(t['machine'])['runnable'])):
531            dbg("skipped: %s", formatTestCase(t))
532            skipped += 1
533            continue
534
535        if args.dry_run:
536            continue
537
538        try:
539            f = checkOneCase(args, t)
540        except KeyboardInterrupt:
541            break
542
543        if f:
544            i, rule = checkResultRuleList(f)
545            dbg("testcase: %r, rule list match: %r", t, rule)
546            wl_stats.setdefault(i, []).append(f)
547            level = rule.get('loglevel', logging.DEBUG)
548            logFailure(f, level)
549            if rule.get('fatal') or (args.strict and level >= logging.WARN):
550                fatal_failures.append(f)
551        else:
552            dbg("success: %s", formatTestCase(t))
553            if expected_match:
554                logger.warn("Didn't fail as expected: %s", formatTestCase(t))
555
556    logger.info("Total: %d test cases", total)
557    if skipped:
558        logger.info("Skipped %d test cases", skipped)
559
560    if args.debug:
561        stats = sorted([(len(wl_stats.get(i, [])), rule) for i, rule in
562                         enumerate(ERROR_RULE_LIST)], key=lambda x: x[0])
563        for count, rule in stats:
564            dbg("error rule stats: %d: %r", count, rule)
565
566    if fatal_failures:
567        for f in fatal_failures:
568            t = f['testcase']
569            logger.error("Fatal failure: %s", formatTestCase(t))
570        logger.error("Fatal failures on some machine/device combinations")
571        return 1
572
573if __name__ == '__main__':
574    sys.exit(main())
575