xref: /openbmc/qemu/scripts/device-crash-test (revision 55e0a3463528f0588e4b8813baddc4179777b3e3)
1#!/usr/bin/env python
2#
3#  Copyright (c) 2017 Red Hat Inc
4#
5# Author:
6#  Eduardo Habkost <ehabkost@redhat.com>
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License along
19# with this program; if not, write to the Free Software Foundation, Inc.,
20# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21
22"""
23Run QEMU with all combinations of -machine and -device types,
24check for crashes and unexpected errors.
25"""
26from __future__ import print_function
27
28import sys
29import glob
30import logging
31import traceback
32import re
33import random
34import argparse
35from itertools import chain
36
37from qemu import QEMUMachine
38
39logger = logging.getLogger('device-crash-test')
40dbg = logger.debug
41
42
43# Purposes of the following whitelist:
44# * Avoiding verbose log messages when we find known non-fatal
45#   (exitcode=1) errors
46# * Avoiding fatal errors when we find known crashes
47# * Skipping machines/devices that are known not to work out of
48#   the box, when running in --quick mode
49#
50# Keeping the whitelist updated is desirable, but not required,
51# because unexpected cases where QEMU exits with exitcode=1 will
52# just trigger a INFO message.
53
54# Valid whitelist entry keys:
55# * accel: regexp, full match only
56# * machine: regexp, full match only
57# * device: regexp, full match only
58# * log: regexp, partial match allowed
59# * exitcode: if not present, defaults to 1. If None, matches any exitcode
60# * warn: if True, matching failures will be logged as warnings
61# * expected: if True, QEMU is expected to always fail every time
62#   when testing the corresponding test case
63# * loglevel: log level of log output when there's a match.
64ERROR_WHITELIST = [
65    # Machines that won't work out of the box:
66    #             MACHINE                         | ERROR MESSAGE
67    {'machine':'niagara', 'expected':True},       # Unable to load a firmware for -M niagara
68    {'machine':'boston', 'expected':True},        # Please provide either a -kernel or -bios argument
69    {'machine':'leon3_generic', 'expected':True}, # Can't read bios image (null)
70
71    # devices that don't work out of the box because they require extra options to "-device DEV":
72    #            DEVICE                                    | ERROR MESSAGE
73    {'device':'.*-(i386|x86_64)-cpu', 'expected':True},    # CPU socket-id is not set
74    {'device':'icp', 'expected':True},                     # icp_realize: required link 'xics' not found: Property '.xics' not found
75    {'device':'ics', 'expected':True},                     # ics_base_realize: required link 'xics' not found: Property '.xics' not found
76    # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True
77    {'device':'ide-cd'},                                 # No drive specified
78    {'device':'ide-drive', 'expected':True},               # No drive specified
79    {'device':'ide-hd', 'expected':True},                  # No drive specified
80    {'device':'ipmi-bmc-extern', 'expected':True},         # IPMI external bmc requires chardev attribute
81    {'device':'isa-debugcon', 'expected':True},            # Can't create serial device, empty char device
82    {'device':'isa-ipmi-bt', 'expected':True},             # IPMI device requires a bmc attribute to be set
83    {'device':'isa-ipmi-kcs', 'expected':True},            # IPMI device requires a bmc attribute to be set
84    {'device':'isa-parallel', 'expected':True},            # Can't create serial device, empty char device
85    {'device':'ivshmem-doorbell', 'expected':True},        # You must specify a 'chardev'
86    {'device':'ivshmem-plain', 'expected':True},           # You must specify a 'memdev'
87    {'device':'loader', 'expected':True},                  # please include valid arguments
88    {'device':'nand', 'expected':True},                    # Unsupported NAND block size 0x1
89    {'device':'nvdimm', 'expected':True},                  # 'memdev' property is not set
90    {'device':'nvme', 'expected':True},                    # Device initialization failed
91    {'device':'pc-dimm', 'expected':True},                 # 'memdev' property is not set
92    {'device':'pci-bridge', 'expected':True},              # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
93    {'device':'pci-bridge-seat', 'expected':True},         # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
94    {'device':'pxb', 'expected':True},                     # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
95    {'device':'scsi-block', 'expected':True},              # drive property not set
96    {'device':'scsi-disk', 'expected':True},               # drive property not set
97    {'device':'scsi-generic', 'expected':True},            # drive property not set
98    {'device':'scsi-hd', 'expected':True},                 # drive property not set
99    {'device':'spapr-pci-host-bridge', 'expected':True},   # BUID not specified for PHB
100    {'device':'spapr-rng', 'expected':True},               # spapr-rng needs an RNG backend!
101    {'device':'spapr-vty', 'expected':True},               # chardev property not set
102    {'device':'tpm-tis', 'expected':True},                 # tpm_tis: backend driver with id (null) could not be found
103    {'device':'unimplemented-device', 'expected':True},    # property 'size' not specified or zero
104    {'device':'usb-braille', 'expected':True},             # Property chardev is required
105    {'device':'usb-mtp', 'expected':True},                 # rootdir property must be configured
106    {'device':'usb-redir', 'expected':True},               # Parameter 'chardev' is missing
107    {'device':'usb-serial', 'expected':True},              # Property chardev is required
108    {'device':'usb-storage', 'expected':True},             # drive property not set
109    {'device':'vfio-amd-xgbe', 'expected':True},           # -device vfio-amd-xgbe: vfio error: wrong host device name
110    {'device':'vfio-calxeda-xgmac', 'expected':True},      # -device vfio-calxeda-xgmac: vfio error: wrong host device name
111    {'device':'vfio-pci', 'expected':True},                # No provided host device
112    {'device':'vfio-pci-igd-lpc-bridge', 'expected':True}, # VFIO dummy ISA/LPC bridge must have address 1f.0
113    {'device':'vhost-scsi.*', 'expected':True},            # vhost-scsi: missing wwpn
114    {'device':'vhost-vsock-device', 'expected':True},      # guest-cid property must be greater than 2
115    {'device':'vhost-vsock-pci', 'expected':True},         # guest-cid property must be greater than 2
116    {'device':'virtio-9p-ccw', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
117    {'device':'virtio-9p-device', 'expected':True},        # 9pfs device couldn't find fsdev with the id = NULL
118    {'device':'virtio-9p-pci', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
119    {'device':'virtio-blk-ccw', 'expected':True},          # drive property not set
120    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
121    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
122    {'device':'virtio-blk-pci', 'expected':True},          # drive property not set
123    {'device':'virtio-crypto-ccw', 'expected':True},       # 'cryptodev' parameter expects a valid object
124    {'device':'virtio-crypto-device', 'expected':True},    # 'cryptodev' parameter expects a valid object
125    {'device':'virtio-crypto-pci', 'expected':True},       # 'cryptodev' parameter expects a valid object
126    {'device':'virtio-input-host-device', 'expected':True}, # evdev property is required
127    {'device':'virtio-input-host-pci', 'expected':True},   # evdev property is required
128    {'device':'xen-pvdevice', 'expected':True},            # Device ID invalid, it must always be supplied
129    {'device':'vhost-vsock-ccw', 'expected':True},         # guest-cid property must be greater than 2
130    {'device':'zpci', 'expected':True},                    # target must be defined
131    {'device':'pnv-(occ|icp|lpc)', 'expected':True},       # required link 'xics' not found: Property '.xics' not found
132    {'device':'powernv-cpu-.*', 'expected':True},          # pnv_core_realize: required link 'xics' not found: Property '.xics' not found
133
134    # ioapic devices are already created by pc and will fail:
135    {'machine':'q35|pc.*', 'device':'kvm-ioapic', 'expected':True}, # Only 1 ioapics allowed
136    {'machine':'q35|pc.*', 'device':'ioapic', 'expected':True},     # Only 1 ioapics allowed
137
138    # "spapr-cpu-core needs a pseries machine"
139    {'machine':'(?!pseries).*', 'device':'.*-spapr-cpu-core', 'expected':True},
140
141    # KVM-specific devices shouldn't be tried without accel=kvm:
142    {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True},
143
144    # xen-specific machines and devices:
145    {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True},
146    {'accel':'(?!xen).*', 'device':'xen-.*', 'expected':True},
147
148    # this fails on some machine-types, but not all, so they don't have expected=True:
149    {'device':'vmgenid'}, # vmgenid requires DMA write support in fw_cfg, which this machine type does not provide
150
151    # Silence INFO messages for errors that are common on multiple
152    # devices/machines:
153    {'log':r"No '[\w-]+' bus found for device '[\w-]+'"},
154    {'log':r"images* must be given with the 'pflash' parameter"},
155    {'log':r"(Guest|ROM|Flash|Kernel) image must be specified"},
156    {'log':r"[cC]ould not load [\w ]+ (BIOS|bios) '[\w-]+\.bin'"},
157    {'log':r"Couldn't find rom image '[\w-]+\.bin'"},
158    {'log':r"speed mismatch trying to attach usb device"},
159    {'log':r"Can't create a second ISA bus"},
160    {'log':r"duplicate fw_cfg file name"},
161    # sysbus-related error messages: most machines reject most dynamic sysbus devices:
162    {'log':r"Option '-device [\w.,-]+' cannot be handled by this machine"},
163    {'log':r"Device [\w.,-]+ is not supported by this machine yet"},
164    {'log':r"Device [\w.,-]+ can not be dynamically instantiated"},
165    {'log':r"Platform Bus: Can not fit MMIO region of size "},
166    # other more specific errors we will ignore:
167    {'device':'.*-spapr-cpu-core', 'log':r"CPU core type should be"},
168    {'log':r"MSI(-X)? is not supported by interrupt controller"},
169    {'log':r"pxb-pcie? devices cannot reside on a PCIe? bus"},
170    {'log':r"Ignoring smp_cpus value"},
171    {'log':r"sd_init failed: Drive 'sd0' is already in use because it has been automatically connected to another device"},
172    {'log':r"This CPU requires a smaller page size than the system is using"},
173    {'log':r"MSI-X support is mandatory in the S390 architecture"},
174    {'log':r"rom check and register reset failed"},
175    {'log':r"Unable to initialize GIC, CPUState for CPU#0 not valid"},
176    {'log':r"Multiple VT220 operator consoles are not supported"},
177    {'log':r"core 0 already populated"},
178    {'log':r"could not find stage1 bootloader"},
179
180    # other exitcode=1 failures not listed above will just generate INFO messages:
181    {'exitcode':1, 'loglevel':logging.INFO},
182
183    # everything else (including SIGABRT and SIGSEGV) will be a fatal error:
184    {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL},
185]
186
187
188def whitelistTestCaseMatch(wl, t):
189    """Check if a test case specification can match a whitelist entry
190
191    This only checks if a whitelist entry is a candidate match
192    for a given test case, it won't check if the test case
193    results/output match the entry.  See whitelistResultMatch().
194    """
195    return (('machine' not in wl or
196             'machine' not in t or
197             re.match(wl['machine'] + '$', t['machine'])) and
198            ('accel' not in wl or
199             'accel' not in t or
200             re.match(wl['accel'] + '$', t['accel'])) and
201            ('device' not in wl or
202             'device' not in t or
203             re.match(wl['device'] + '$', t['device'])))
204
205
206def whitelistCandidates(t):
207    """Generate the list of candidates that can match a test case"""
208    for i, wl in enumerate(ERROR_WHITELIST):
209        if whitelistTestCaseMatch(wl, t):
210            yield (i, wl)
211
212
213def findExpectedResult(t):
214    """Check if there's an expected=True whitelist entry for a test case
215
216    Returns (i, wl) tuple, where i is the index in
217    ERROR_WHITELIST and wl is the whitelist entry itself.
218    """
219    for i, wl in whitelistCandidates(t):
220        if wl.get('expected'):
221            return (i, wl)
222
223
224def whitelistResultMatch(wl, r):
225    """Check if test case results/output match a whitelist entry
226
227    It is valid to call this function only if
228    whitelistTestCaseMatch() is True for the entry (e.g. on
229    entries returned by whitelistCandidates())
230    """
231    assert whitelistTestCaseMatch(wl, r['testcase'])
232    return ((wl.get('exitcode', 1) is None or
233             r['exitcode'] == wl.get('exitcode', 1)) and
234            ('log' not in wl or
235             re.search(wl['log'], r['log'], re.MULTILINE)))
236
237
238def checkResultWhitelist(r):
239    """Look up whitelist entry for a given test case result
240
241    Returns (i, wl) tuple, where i is the index in
242    ERROR_WHITELIST and wl is the whitelist entry itself.
243    """
244    for i, wl in whitelistCandidates(r['testcase']):
245        if whitelistResultMatch(wl, r):
246            return i, wl
247
248    raise Exception("this should never happen")
249
250
251def qemuOptsEscape(s):
252    """Escape option value QemuOpts"""
253    return s.replace(",", ",,")
254
255
256def formatTestCase(t):
257    """Format test case info as "key=value key=value" for prettier logging output"""
258    return ' '.join('%s=%s' % (k, v) for k, v in t.items())
259
260
261def qomListTypeNames(vm, **kwargs):
262    """Run qom-list-types QMP command, return type names"""
263    types = vm.command('qom-list-types', **kwargs)
264    return [t['name'] for t in types]
265
266
267def infoQDM(vm):
268    """Parse 'info qdm' output"""
269    args = {'command-line': 'info qdm'}
270    devhelp = vm.command('human-monitor-command', **args)
271    for l in devhelp.split('\n'):
272        l = l.strip()
273        if l == '' or l.endswith(':'):
274            continue
275        d = {'name': re.search(r'name "([^"]+)"', l).group(1),
276             'no-user': (re.search(', no-user', l) is not None)}
277        yield d
278
279
280class QemuBinaryInfo(object):
281    def __init__(self, binary, devtype):
282        if devtype is None:
283            devtype = 'device'
284
285        self.binary = binary
286        self._machine_info = {}
287
288        dbg("devtype: %r", devtype)
289        args = ['-S', '-machine', 'none,accel=kvm:tcg']
290        dbg("querying info for QEMU binary: %s", binary)
291        vm = QEMUMachine(binary=binary, args=args)
292        vm.launch()
293        try:
294            self.alldevs = set(qomListTypeNames(vm, implements=devtype, abstract=False))
295            # there's no way to query DeviceClass::user_creatable using QMP,
296            # so use 'info qdm':
297            self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']])
298            self.machines = list(m['name'] for m in vm.command('query-machines'))
299            self.user_devs = self.alldevs.difference(self.no_user_devs)
300            self.kvm_available = vm.command('query-kvm')['enabled']
301        finally:
302            vm.shutdown()
303
304    def machineInfo(self, machine):
305        """Query for information on a specific machine-type
306
307        Results are cached internally, in case the same machine-
308        type is queried multiple times.
309        """
310        if machine in self._machine_info:
311            return self._machine_info[machine]
312
313        mi = {}
314        args = ['-S', '-machine', '%s' % (machine)]
315        dbg("querying machine info for binary=%s machine=%s", self.binary, machine)
316        vm = QEMUMachine(binary=self.binary, args=args)
317        try:
318            vm.launch()
319            mi['runnable'] = True
320        except KeyboardInterrupt:
321            raise
322        except:
323            dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info())
324            dbg("log: %r", vm.get_log())
325            mi['runnable'] = False
326
327        vm.shutdown()
328        self._machine_info[machine] = mi
329        return mi
330
331
332BINARY_INFO = {}
333
334
335def getBinaryInfo(args, binary):
336    if binary not in BINARY_INFO:
337        BINARY_INFO[binary] = QemuBinaryInfo(binary, args.devtype)
338    return BINARY_INFO[binary]
339
340
341def checkOneCase(args, testcase):
342    """Check one specific case
343
344    Returns a dictionary containing failure information on error,
345    or None on success
346    """
347    binary = testcase['binary']
348    accel = testcase['accel']
349    machine = testcase['machine']
350    device = testcase['device']
351
352    dbg("will test: %r", testcase)
353
354    args = ['-S', '-machine', '%s,accel=%s' % (machine, accel),
355            '-device', qemuOptsEscape(device)]
356    cmdline = ' '.join([binary] + args)
357    dbg("will launch QEMU: %s", cmdline)
358    vm = QEMUMachine(binary=binary, args=args)
359
360    exc_traceback = None
361    try:
362        vm.launch()
363    except KeyboardInterrupt:
364        raise
365    except:
366        exc_traceback = traceback.format_exc()
367        dbg("Exception while running test case")
368    finally:
369        vm.shutdown()
370        ec = vm.exitcode()
371        log = vm.get_log()
372
373    if exc_traceback is not None or ec != 0:
374        return {'exc_traceback':exc_traceback,
375                'exitcode':ec,
376                'log':log,
377                'testcase':testcase,
378                'cmdline':cmdline}
379
380
381def binariesToTest(args, testcase):
382    if args.qemu:
383        r = args.qemu
384    else:
385        r = glob.glob('./*-softmmu/qemu-system-*')
386    return r
387
388
389def accelsToTest(args, testcase):
390    if getBinaryInfo(args, testcase['binary']).kvm_available:
391        yield 'kvm'
392    yield 'tcg'
393
394
395def machinesToTest(args, testcase):
396    return getBinaryInfo(args, testcase['binary']).machines
397
398
399def devicesToTest(args, testcase):
400    return getBinaryInfo(args, testcase['binary']).user_devs
401
402
403TESTCASE_VARIABLES = [
404    ('binary', binariesToTest),
405    ('accel', accelsToTest),
406    ('machine', machinesToTest),
407    ('device', devicesToTest),
408]
409
410
411def genCases1(args, testcases, var, fn):
412    """Generate new testcases for one variable
413
414    If an existing item already has a variable set, don't
415    generate new items and just return it directly. This
416    allows the "-t" command-line option to be used to choose
417    a specific test case.
418    """
419    for testcase in testcases:
420        if var in testcase:
421            yield testcase.copy()
422        else:
423            for i in fn(args, testcase):
424                t = testcase.copy()
425                t[var] = i
426                yield t
427
428
429def genCases(args, testcase):
430    """Generate test cases for all variables
431    """
432    cases = [testcase.copy()]
433    for var, fn in TESTCASE_VARIABLES:
434        dbg("var: %r, fn: %r", var, fn)
435        cases = genCases1(args, cases, var, fn)
436    return cases
437
438
439def casesToTest(args, testcase):
440    cases = genCases(args, testcase)
441    if args.random:
442        cases = list(cases)
443        cases = random.sample(cases, min(args.random, len(cases)))
444    if args.debug:
445        cases = list(cases)
446        dbg("%d test cases to test", len(cases))
447    if args.shuffle:
448        cases = list(cases)
449        random.shuffle(cases)
450    return cases
451
452
453def logFailure(f, level):
454    t = f['testcase']
455    logger.log(level, "failed: %s", formatTestCase(t))
456    logger.log(level, "cmdline: %s", f['cmdline'])
457    for l in f['log'].strip().split('\n'):
458        logger.log(level, "log: %s", l)
459    logger.log(level, "exit code: %r", f['exitcode'])
460    if f['exc_traceback']:
461        logger.log(level, "exception:")
462        for l in f['exc_traceback'].split('\n'):
463            logger.log(level, "  %s", l.rstrip('\n'))
464
465
466def main():
467    parser = argparse.ArgumentParser(description="QEMU -device crash test")
468    parser.add_argument('-t', metavar='KEY=VALUE', nargs='*',
469                        help="Limit test cases to KEY=VALUE",
470                        action='append', dest='testcases', default=[])
471    parser.add_argument('-d', '--debug', action='store_true',
472                        help='debug output')
473    parser.add_argument('-v', '--verbose', action='store_true', default=True,
474                        help='verbose output')
475    parser.add_argument('-q', '--quiet', dest='verbose', action='store_false',
476                        help='non-verbose output')
477    parser.add_argument('-r', '--random', type=int, metavar='COUNT',
478                        help='run a random sample of COUNT test cases',
479                        default=0)
480    parser.add_argument('--shuffle', action='store_true',
481                        help='Run test cases in random order')
482    parser.add_argument('--dry-run', action='store_true',
483                        help="Don't run any tests, just generate list")
484    parser.add_argument('-D', '--devtype', metavar='TYPE',
485                        help="Test only device types that implement TYPE")
486    parser.add_argument('-Q', '--quick', action='store_true', default=True,
487                        help="Quick mode: skip test cases that are expected to fail")
488    parser.add_argument('-F', '--full', action='store_false', dest='quick',
489                        help="Full mode: test cases that are expected to fail")
490    parser.add_argument('--strict', action='store_true', dest='strict',
491                        help="Treat all warnings as fatal")
492    parser.add_argument('qemu', nargs='*', metavar='QEMU',
493                        help='QEMU binary to run')
494    args = parser.parse_args()
495
496    if args.debug:
497        lvl = logging.DEBUG
498    elif args.verbose:
499        lvl = logging.INFO
500    else:
501        lvl = logging.WARN
502    logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s')
503
504    fatal_failures = []
505    wl_stats = {}
506    skipped = 0
507    total = 0
508
509    tc = {}
510    dbg("testcases: %r", args.testcases)
511    if args.testcases:
512        for t in chain(*args.testcases):
513            for kv in t.split():
514                k, v = kv.split('=', 1)
515                tc[k] = v
516
517    if len(binariesToTest(args, tc)) == 0:
518        print("No QEMU binary found", file=sys.stderr)
519        parser.print_usage(sys.stderr)
520        return 1
521
522    for t in casesToTest(args, tc):
523        logger.info("running test case: %s", formatTestCase(t))
524        total += 1
525
526        expected_match = findExpectedResult(t)
527        if (args.quick and
528                (expected_match or
529                 not getBinaryInfo(args, t['binary']).machineInfo(t['machine'])['runnable'])):
530            dbg("skipped: %s", formatTestCase(t))
531            skipped += 1
532            continue
533
534        if args.dry_run:
535            continue
536
537        try:
538            f = checkOneCase(args, t)
539        except KeyboardInterrupt:
540            break
541
542        if f:
543            i, wl = checkResultWhitelist(f)
544            dbg("testcase: %r, whitelist match: %r", t, wl)
545            wl_stats.setdefault(i, []).append(f)
546            level = wl.get('loglevel', logging.DEBUG)
547            logFailure(f, level)
548            if wl.get('fatal') or (args.strict and level >= logging.WARN):
549                fatal_failures.append(f)
550        else:
551            dbg("success: %s", formatTestCase(t))
552            if expected_match:
553                logger.warn("Didn't fail as expected: %s", formatTestCase(t))
554
555    logger.info("Total: %d test cases", total)
556    if skipped:
557        logger.info("Skipped %d test cases", skipped)
558
559    if args.debug:
560        stats = sorted([(len(wl_stats.get(i, [])), wl) for i, wl in
561                         enumerate(ERROR_WHITELIST)], key=lambda x: x[0])
562        for count, wl in stats:
563            dbg("whitelist entry stats: %d: %r", count, wl)
564
565    if fatal_failures:
566        for f in fatal_failures:
567            t = f['testcase']
568            logger.error("Fatal failure: %s", formatTestCase(t))
569        logger.error("Fatal failures on some machine/device combinations")
570        return 1
571
572if __name__ == '__main__':
573    sys.exit(main())
574