xref: /openbmc/qemu/scripts/device-crash-test (revision 9921e3d3)
1#!/usr/bin/env python3
2#
3#  Copyright (c) 2017 Red Hat Inc
4#
5# Author:
6#  Eduardo Habkost <ehabkost@redhat.com>
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License along
19# with this program; if not, write to the Free Software Foundation, Inc.,
20# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21
22"""
23Run QEMU with all combinations of -machine and -device types,
24check for crashes and unexpected errors.
25"""
26
27import os
28import sys
29import glob
30import logging
31import traceback
32import re
33import random
34import argparse
35from itertools import chain
36
37sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
38from qemu.machine import QEMUMachine
39
40logger = logging.getLogger('device-crash-test')
41dbg = logger.debug
42
43
44# Purposes of the following whitelist:
45# * Avoiding verbose log messages when we find known non-fatal
46#   (exitcode=1) errors
47# * Avoiding fatal errors when we find known crashes
48# * Skipping machines/devices that are known not to work out of
49#   the box, when running in --quick mode
50#
51# Keeping the whitelist updated is desirable, but not required,
52# because unexpected cases where QEMU exits with exitcode=1 will
53# just trigger a INFO message.
54
55# Valid whitelist entry keys:
56# * accel: regexp, full match only
57# * machine: regexp, full match only
58# * device: regexp, full match only
59# * log: regexp, partial match allowed
60# * exitcode: if not present, defaults to 1. If None, matches any exitcode
61# * warn: if True, matching failures will be logged as warnings
62# * expected: if True, QEMU is expected to always fail every time
63#   when testing the corresponding test case
64# * loglevel: log level of log output when there's a match.
65ERROR_WHITELIST = [
66    # Machines that won't work out of the box:
67    #             MACHINE                         | ERROR MESSAGE
68    {'machine':'niagara', 'expected':True},       # Unable to load a firmware for -M niagara
69    {'machine':'boston', 'expected':True},        # Please provide either a -kernel or -bios argument
70    {'machine':'leon3_generic', 'expected':True}, # Can't read bios image (null)
71
72    # devices that don't work out of the box because they require extra options to "-device DEV":
73    #            DEVICE                                    | ERROR MESSAGE
74    {'device':'.*-(i386|x86_64)-cpu', 'expected':True},    # CPU socket-id is not set
75    {'device':'icp', 'expected':True},                     # icp_realize: required link 'xics' not found: Property '.xics' not found
76    {'device':'ics', 'expected':True},                     # ics_base_realize: required link 'xics' not found: Property '.xics' not found
77    # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True
78    {'device':'ide-cd'},                                 # No drive specified
79    {'device':'ide-drive', 'expected':True},               # No drive specified
80    {'device':'ide-hd', 'expected':True},                  # No drive specified
81    {'device':'ipmi-bmc-extern', 'expected':True},         # IPMI external bmc requires chardev attribute
82    {'device':'isa-debugcon', 'expected':True},            # Can't create serial device, empty char device
83    {'device':'isa-ipmi-bt', 'expected':True},             # IPMI device requires a bmc attribute to be set
84    {'device':'isa-ipmi-kcs', 'expected':True},            # IPMI device requires a bmc attribute to be set
85    {'device':'isa-parallel', 'expected':True},            # Can't create serial device, empty char device
86    {'device':'ivshmem-doorbell', 'expected':True},        # You must specify a 'chardev'
87    {'device':'ivshmem-plain', 'expected':True},           # You must specify a 'memdev'
88    {'device':'loader', 'expected':True},                  # please include valid arguments
89    {'device':'nand', 'expected':True},                    # Unsupported NAND block size 0x1
90    {'device':'nvdimm', 'expected':True},                  # 'memdev' property is not set
91    {'device':'nvme', 'expected':True},                    # Device initialization failed
92    {'device':'pc-dimm', 'expected':True},                 # 'memdev' property is not set
93    {'device':'pci-bridge', 'expected':True},              # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
94    {'device':'pci-bridge-seat', 'expected':True},         # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
95    {'device':'pxb', 'expected':True},                     # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
96    {'device':'scsi-block', 'expected':True},              # drive property not set
97    {'device':'scsi-disk', 'expected':True},               # drive property not set
98    {'device':'scsi-generic', 'expected':True},            # drive property not set
99    {'device':'scsi-hd', 'expected':True},                 # drive property not set
100    {'device':'spapr-pci-host-bridge', 'expected':True},   # BUID not specified for PHB
101    {'device':'spapr-rng', 'expected':True},               # spapr-rng needs an RNG backend!
102    {'device':'spapr-vty', 'expected':True},               # chardev property not set
103    {'device':'tpm-tis', 'expected':True},                 # tpm_tis: backend driver with id (null) could not be found
104    {'device':'unimplemented-device', 'expected':True},    # property 'size' not specified or zero
105    {'device':'usb-braille', 'expected':True},             # Property chardev is required
106    {'device':'usb-mtp', 'expected':True},                 # rootdir property must be configured
107    {'device':'usb-redir', 'expected':True},               # Parameter 'chardev' is missing
108    {'device':'usb-serial', 'expected':True},              # Property chardev is required
109    {'device':'usb-storage', 'expected':True},             # drive property not set
110    {'device':'vfio-amd-xgbe', 'expected':True},           # -device vfio-amd-xgbe: vfio error: wrong host device name
111    {'device':'vfio-calxeda-xgmac', 'expected':True},      # -device vfio-calxeda-xgmac: vfio error: wrong host device name
112    {'device':'vfio-pci', 'expected':True},                # No provided host device
113    {'device':'vfio-pci-igd-lpc-bridge', 'expected':True}, # VFIO dummy ISA/LPC bridge must have address 1f.0
114    {'device':'vhost-scsi.*', 'expected':True},            # vhost-scsi: missing wwpn
115    {'device':'vhost-vsock-device', 'expected':True},      # guest-cid property must be greater than 2
116    {'device':'vhost-vsock-pci', 'expected':True},         # guest-cid property must be greater than 2
117    {'device':'virtio-9p-ccw', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
118    {'device':'virtio-9p-device', 'expected':True},        # 9pfs device couldn't find fsdev with the id = NULL
119    {'device':'virtio-9p-pci', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
120    {'device':'virtio-blk-ccw', 'expected':True},          # drive property not set
121    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
122    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
123    {'device':'virtio-blk-pci', 'expected':True},          # drive property not set
124    {'device':'virtio-crypto-ccw', 'expected':True},       # 'cryptodev' parameter expects a valid object
125    {'device':'virtio-crypto-device', 'expected':True},    # 'cryptodev' parameter expects a valid object
126    {'device':'virtio-crypto-pci', 'expected':True},       # 'cryptodev' parameter expects a valid object
127    {'device':'virtio-input-host-device', 'expected':True}, # evdev property is required
128    {'device':'virtio-input-host-pci', 'expected':True},   # evdev property is required
129    {'device':'xen-pvdevice', 'expected':True},            # Device ID invalid, it must always be supplied
130    {'device':'vhost-vsock-ccw', 'expected':True},         # guest-cid property must be greater than 2
131    {'device':'zpci', 'expected':True},                    # target must be defined
132    {'device':'pnv-(occ|icp|lpc)', 'expected':True},       # required link 'xics' not found: Property '.xics' not found
133    {'device':'powernv-cpu-.*', 'expected':True},          # pnv_core_realize: required link 'xics' not found: Property '.xics' not found
134
135    # ioapic devices are already created by pc and will fail:
136    {'machine':'q35|pc.*', 'device':'kvm-ioapic', 'expected':True}, # Only 1 ioapics allowed
137    {'machine':'q35|pc.*', 'device':'ioapic', 'expected':True},     # Only 1 ioapics allowed
138
139    # "spapr-cpu-core needs a pseries machine"
140    {'machine':'(?!pseries).*', 'device':'.*-spapr-cpu-core', 'expected':True},
141
142    # KVM-specific devices shouldn't be tried without accel=kvm:
143    {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True},
144
145    # xen-specific machines and devices:
146    {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True},
147    {'accel':'(?!xen).*', 'device':'xen-.*', 'expected':True},
148
149    # this fails on some machine-types, but not all, so they don't have expected=True:
150    {'device':'vmgenid'}, # vmgenid requires DMA write support in fw_cfg, which this machine type does not provide
151
152    # Silence INFO messages for errors that are common on multiple
153    # devices/machines:
154    {'log':r"No '[\w-]+' bus found for device '[\w-]+'"},
155    {'log':r"images* must be given with the 'pflash' parameter"},
156    {'log':r"(Guest|ROM|Flash|Kernel) image must be specified"},
157    {'log':r"[cC]ould not load [\w ]+ (BIOS|bios) '[\w-]+\.bin'"},
158    {'log':r"Couldn't find rom image '[\w-]+\.bin'"},
159    {'log':r"speed mismatch trying to attach usb device"},
160    {'log':r"Can't create a second ISA bus"},
161    {'log':r"duplicate fw_cfg file name"},
162    # sysbus-related error messages: most machines reject most dynamic sysbus devices:
163    {'log':r"Option '-device [\w.,-]+' cannot be handled by this machine"},
164    {'log':r"Device [\w.,-]+ is not supported by this machine yet"},
165    {'log':r"Device [\w.,-]+ can not be dynamically instantiated"},
166    {'log':r"Platform Bus: Can not fit MMIO region of size "},
167    # other more specific errors we will ignore:
168    {'device':'.*-spapr-cpu-core', 'log':r"CPU core type should be"},
169    {'log':r"MSI(-X)? is not supported by interrupt controller"},
170    {'log':r"pxb-pcie? devices cannot reside on a PCIe? bus"},
171    {'log':r"Ignoring smp_cpus value"},
172    {'log':r"sd_init failed: Drive 'sd0' is already in use because it has been automatically connected to another device"},
173    {'log':r"This CPU requires a smaller page size than the system is using"},
174    {'log':r"MSI-X support is mandatory in the S390 architecture"},
175    {'log':r"rom check and register reset failed"},
176    {'log':r"Unable to initialize GIC, CPUState for CPU#0 not valid"},
177    {'log':r"Multiple VT220 operator consoles are not supported"},
178    {'log':r"core 0 already populated"},
179    {'log':r"could not find stage1 bootloader"},
180
181    # other exitcode=1 failures not listed above will just generate INFO messages:
182    {'exitcode':1, 'loglevel':logging.INFO},
183
184    # everything else (including SIGABRT and SIGSEGV) will be a fatal error:
185    {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL},
186]
187
188
189def whitelistTestCaseMatch(wl, t):
190    """Check if a test case specification can match a whitelist entry
191
192    This only checks if a whitelist entry is a candidate match
193    for a given test case, it won't check if the test case
194    results/output match the entry.  See whitelistResultMatch().
195    """
196    return (('machine' not in wl or
197             'machine' not in t or
198             re.match(wl['machine'] + '$', t['machine'])) and
199            ('accel' not in wl or
200             'accel' not in t or
201             re.match(wl['accel'] + '$', t['accel'])) and
202            ('device' not in wl or
203             'device' not in t or
204             re.match(wl['device'] + '$', t['device'])))
205
206
207def whitelistCandidates(t):
208    """Generate the list of candidates that can match a test case"""
209    for i, wl in enumerate(ERROR_WHITELIST):
210        if whitelistTestCaseMatch(wl, t):
211            yield (i, wl)
212
213
214def findExpectedResult(t):
215    """Check if there's an expected=True whitelist entry for a test case
216
217    Returns (i, wl) tuple, where i is the index in
218    ERROR_WHITELIST and wl is the whitelist entry itself.
219    """
220    for i, wl in whitelistCandidates(t):
221        if wl.get('expected'):
222            return (i, wl)
223
224
225def whitelistResultMatch(wl, r):
226    """Check if test case results/output match a whitelist entry
227
228    It is valid to call this function only if
229    whitelistTestCaseMatch() is True for the entry (e.g. on
230    entries returned by whitelistCandidates())
231    """
232    assert whitelistTestCaseMatch(wl, r['testcase'])
233    return ((wl.get('exitcode', 1) is None or
234             r['exitcode'] == wl.get('exitcode', 1)) and
235            ('log' not in wl or
236             re.search(wl['log'], r['log'], re.MULTILINE)))
237
238
239def checkResultWhitelist(r):
240    """Look up whitelist entry for a given test case result
241
242    Returns (i, wl) tuple, where i is the index in
243    ERROR_WHITELIST and wl is the whitelist entry itself.
244    """
245    for i, wl in whitelistCandidates(r['testcase']):
246        if whitelistResultMatch(wl, r):
247            return i, wl
248
249    raise Exception("this should never happen")
250
251
252def qemuOptsEscape(s):
253    """Escape option value QemuOpts"""
254    return s.replace(",", ",,")
255
256
257def formatTestCase(t):
258    """Format test case info as "key=value key=value" for prettier logging output"""
259    return ' '.join('%s=%s' % (k, v) for k, v in t.items())
260
261
262def qomListTypeNames(vm, **kwargs):
263    """Run qom-list-types QMP command, return type names"""
264    types = vm.command('qom-list-types', **kwargs)
265    return [t['name'] for t in types]
266
267
268def infoQDM(vm):
269    """Parse 'info qdm' output"""
270    args = {'command-line': 'info qdm'}
271    devhelp = vm.command('human-monitor-command', **args)
272    for l in devhelp.split('\n'):
273        l = l.strip()
274        if l == '' or l.endswith(':'):
275            continue
276        d = {'name': re.search(r'name "([^"]+)"', l).group(1),
277             'no-user': (re.search(', no-user', l) is not None)}
278        yield d
279
280
281class QemuBinaryInfo(object):
282    def __init__(self, binary, devtype):
283        if devtype is None:
284            devtype = 'device'
285
286        self.binary = binary
287        self._machine_info = {}
288
289        dbg("devtype: %r", devtype)
290        args = ['-S', '-machine', 'none,accel=kvm:tcg']
291        dbg("querying info for QEMU binary: %s", binary)
292        vm = QEMUMachine(binary=binary, args=args)
293        vm.launch()
294        try:
295            self.alldevs = set(qomListTypeNames(vm, implements=devtype, abstract=False))
296            # there's no way to query DeviceClass::user_creatable using QMP,
297            # so use 'info qdm':
298            self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']])
299            self.machines = list(m['name'] for m in vm.command('query-machines'))
300            self.user_devs = self.alldevs.difference(self.no_user_devs)
301            self.kvm_available = vm.command('query-kvm')['enabled']
302        finally:
303            vm.shutdown()
304
305    def machineInfo(self, machine):
306        """Query for information on a specific machine-type
307
308        Results are cached internally, in case the same machine-
309        type is queried multiple times.
310        """
311        if machine in self._machine_info:
312            return self._machine_info[machine]
313
314        mi = {}
315        args = ['-S', '-machine', '%s' % (machine)]
316        dbg("querying machine info for binary=%s machine=%s", self.binary, machine)
317        vm = QEMUMachine(binary=self.binary, args=args)
318        try:
319            vm.launch()
320            mi['runnable'] = True
321        except KeyboardInterrupt:
322            raise
323        except:
324            dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info())
325            dbg("log: %r", vm.get_log())
326            mi['runnable'] = False
327
328        vm.shutdown()
329        self._machine_info[machine] = mi
330        return mi
331
332
333BINARY_INFO = {}
334
335
336def getBinaryInfo(args, binary):
337    if binary not in BINARY_INFO:
338        BINARY_INFO[binary] = QemuBinaryInfo(binary, args.devtype)
339    return BINARY_INFO[binary]
340
341
342def checkOneCase(args, testcase):
343    """Check one specific case
344
345    Returns a dictionary containing failure information on error,
346    or None on success
347    """
348    binary = testcase['binary']
349    accel = testcase['accel']
350    machine = testcase['machine']
351    device = testcase['device']
352
353    dbg("will test: %r", testcase)
354
355    args = ['-S', '-machine', '%s,accel=%s' % (machine, accel),
356            '-device', qemuOptsEscape(device)]
357    cmdline = ' '.join([binary] + args)
358    dbg("will launch QEMU: %s", cmdline)
359    vm = QEMUMachine(binary=binary, args=args)
360
361    exc_traceback = None
362    try:
363        vm.launch()
364    except KeyboardInterrupt:
365        raise
366    except:
367        exc_traceback = traceback.format_exc()
368        dbg("Exception while running test case")
369    finally:
370        vm.shutdown()
371        ec = vm.exitcode()
372        log = vm.get_log()
373
374    if exc_traceback is not None or ec != 0:
375        return {'exc_traceback':exc_traceback,
376                'exitcode':ec,
377                'log':log,
378                'testcase':testcase,
379                'cmdline':cmdline}
380
381
382def binariesToTest(args, testcase):
383    if args.qemu:
384        r = args.qemu
385    else:
386        r = glob.glob('./qemu-system-*')
387    return r
388
389
390def accelsToTest(args, testcase):
391    if getBinaryInfo(args, testcase['binary']).kvm_available:
392        yield 'kvm'
393    yield 'tcg'
394
395
396def machinesToTest(args, testcase):
397    return getBinaryInfo(args, testcase['binary']).machines
398
399
400def devicesToTest(args, testcase):
401    return getBinaryInfo(args, testcase['binary']).user_devs
402
403
404TESTCASE_VARIABLES = [
405    ('binary', binariesToTest),
406    ('accel', accelsToTest),
407    ('machine', machinesToTest),
408    ('device', devicesToTest),
409]
410
411
412def genCases1(args, testcases, var, fn):
413    """Generate new testcases for one variable
414
415    If an existing item already has a variable set, don't
416    generate new items and just return it directly. This
417    allows the "-t" command-line option to be used to choose
418    a specific test case.
419    """
420    for testcase in testcases:
421        if var in testcase:
422            yield testcase.copy()
423        else:
424            for i in fn(args, testcase):
425                t = testcase.copy()
426                t[var] = i
427                yield t
428
429
430def genCases(args, testcase):
431    """Generate test cases for all variables
432    """
433    cases = [testcase.copy()]
434    for var, fn in TESTCASE_VARIABLES:
435        dbg("var: %r, fn: %r", var, fn)
436        cases = genCases1(args, cases, var, fn)
437    return cases
438
439
440def casesToTest(args, testcase):
441    cases = genCases(args, testcase)
442    if args.random:
443        cases = list(cases)
444        cases = random.sample(cases, min(args.random, len(cases)))
445    if args.debug:
446        cases = list(cases)
447        dbg("%d test cases to test", len(cases))
448    if args.shuffle:
449        cases = list(cases)
450        random.shuffle(cases)
451    return cases
452
453
454def logFailure(f, level):
455    t = f['testcase']
456    logger.log(level, "failed: %s", formatTestCase(t))
457    logger.log(level, "cmdline: %s", f['cmdline'])
458    for l in f['log'].strip().split('\n'):
459        logger.log(level, "log: %s", l)
460    logger.log(level, "exit code: %r", f['exitcode'])
461    if f['exc_traceback']:
462        logger.log(level, "exception:")
463        for l in f['exc_traceback'].split('\n'):
464            logger.log(level, "  %s", l.rstrip('\n'))
465
466
467def main():
468    parser = argparse.ArgumentParser(description="QEMU -device crash test")
469    parser.add_argument('-t', metavar='KEY=VALUE', nargs='*',
470                        help="Limit test cases to KEY=VALUE",
471                        action='append', dest='testcases', default=[])
472    parser.add_argument('-d', '--debug', action='store_true',
473                        help='debug output')
474    parser.add_argument('-v', '--verbose', action='store_true', default=True,
475                        help='verbose output')
476    parser.add_argument('-q', '--quiet', dest='verbose', action='store_false',
477                        help='non-verbose output')
478    parser.add_argument('-r', '--random', type=int, metavar='COUNT',
479                        help='run a random sample of COUNT test cases',
480                        default=0)
481    parser.add_argument('--shuffle', action='store_true',
482                        help='Run test cases in random order')
483    parser.add_argument('--dry-run', action='store_true',
484                        help="Don't run any tests, just generate list")
485    parser.add_argument('-D', '--devtype', metavar='TYPE',
486                        help="Test only device types that implement TYPE")
487    parser.add_argument('-Q', '--quick', action='store_true', default=True,
488                        help="Quick mode: skip test cases that are expected to fail")
489    parser.add_argument('-F', '--full', action='store_false', dest='quick',
490                        help="Full mode: test cases that are expected to fail")
491    parser.add_argument('--strict', action='store_true', dest='strict',
492                        help="Treat all warnings as fatal")
493    parser.add_argument('qemu', nargs='*', metavar='QEMU',
494                        help='QEMU binary to run')
495    args = parser.parse_args()
496
497    if args.debug:
498        lvl = logging.DEBUG
499    elif args.verbose:
500        lvl = logging.INFO
501    else:
502        lvl = logging.WARN
503    logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s')
504
505    fatal_failures = []
506    wl_stats = {}
507    skipped = 0
508    total = 0
509
510    tc = {}
511    dbg("testcases: %r", args.testcases)
512    if args.testcases:
513        for t in chain(*args.testcases):
514            for kv in t.split():
515                k, v = kv.split('=', 1)
516                tc[k] = v
517
518    if len(binariesToTest(args, tc)) == 0:
519        print("No QEMU binary found", file=sys.stderr)
520        parser.print_usage(sys.stderr)
521        return 1
522
523    for t in casesToTest(args, tc):
524        logger.info("running test case: %s", formatTestCase(t))
525        total += 1
526
527        expected_match = findExpectedResult(t)
528        if (args.quick and
529                (expected_match or
530                 not getBinaryInfo(args, t['binary']).machineInfo(t['machine'])['runnable'])):
531            dbg("skipped: %s", formatTestCase(t))
532            skipped += 1
533            continue
534
535        if args.dry_run:
536            continue
537
538        try:
539            f = checkOneCase(args, t)
540        except KeyboardInterrupt:
541            break
542
543        if f:
544            i, wl = checkResultWhitelist(f)
545            dbg("testcase: %r, whitelist match: %r", t, wl)
546            wl_stats.setdefault(i, []).append(f)
547            level = wl.get('loglevel', logging.DEBUG)
548            logFailure(f, level)
549            if wl.get('fatal') or (args.strict and level >= logging.WARN):
550                fatal_failures.append(f)
551        else:
552            dbg("success: %s", formatTestCase(t))
553            if expected_match:
554                logger.warn("Didn't fail as expected: %s", formatTestCase(t))
555
556    logger.info("Total: %d test cases", total)
557    if skipped:
558        logger.info("Skipped %d test cases", skipped)
559
560    if args.debug:
561        stats = sorted([(len(wl_stats.get(i, [])), wl) for i, wl in
562                         enumerate(ERROR_WHITELIST)], key=lambda x: x[0])
563        for count, wl in stats:
564            dbg("whitelist entry stats: %d: %r", count, wl)
565
566    if fatal_failures:
567        for f in fatal_failures:
568            t = f['testcase']
569            logger.error("Fatal failure: %s", formatTestCase(t))
570        logger.error("Fatal failures on some machine/device combinations")
571        return 1
572
573if __name__ == '__main__':
574    sys.exit(main())
575