xref: /openbmc/qemu/scripts/device-crash-test (revision 40d6ee94)
1#!/usr/bin/env python
2#
3#  Copyright (c) 2017 Red Hat Inc
4#
5# Author:
6#  Eduardo Habkost <ehabkost@redhat.com>
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License along
19# with this program; if not, write to the Free Software Foundation, Inc.,
20# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21
22"""
23Run QEMU with all combinations of -machine and -device types,
24check for crashes and unexpected errors.
25"""
26from __future__ import print_function
27
28import os
29import sys
30import glob
31import logging
32import traceback
33import re
34import random
35import argparse
36from itertools import chain
37
38sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
39from qemu import QEMUMachine
40
41logger = logging.getLogger('device-crash-test')
42dbg = logger.debug
43
44
45# Purposes of the following whitelist:
46# * Avoiding verbose log messages when we find known non-fatal
47#   (exitcode=1) errors
48# * Avoiding fatal errors when we find known crashes
49# * Skipping machines/devices that are known not to work out of
50#   the box, when running in --quick mode
51#
52# Keeping the whitelist updated is desirable, but not required,
53# because unexpected cases where QEMU exits with exitcode=1 will
54# just trigger a INFO message.
55
56# Valid whitelist entry keys:
57# * accel: regexp, full match only
58# * machine: regexp, full match only
59# * device: regexp, full match only
60# * log: regexp, partial match allowed
61# * exitcode: if not present, defaults to 1. If None, matches any exitcode
62# * warn: if True, matching failures will be logged as warnings
63# * expected: if True, QEMU is expected to always fail every time
64#   when testing the corresponding test case
65# * loglevel: log level of log output when there's a match.
66ERROR_WHITELIST = [
67    # Machines that won't work out of the box:
68    #             MACHINE                         | ERROR MESSAGE
69    {'machine':'niagara', 'expected':True},       # Unable to load a firmware for -M niagara
70    {'machine':'boston', 'expected':True},        # Please provide either a -kernel or -bios argument
71    {'machine':'leon3_generic', 'expected':True}, # Can't read bios image (null)
72
73    # devices that don't work out of the box because they require extra options to "-device DEV":
74    #            DEVICE                                    | ERROR MESSAGE
75    {'device':'.*-(i386|x86_64)-cpu', 'expected':True},    # CPU socket-id is not set
76    {'device':'icp', 'expected':True},                     # icp_realize: required link 'xics' not found: Property '.xics' not found
77    {'device':'ics', 'expected':True},                     # ics_base_realize: required link 'xics' not found: Property '.xics' not found
78    # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True
79    {'device':'ide-cd'},                                 # No drive specified
80    {'device':'ide-drive', 'expected':True},               # No drive specified
81    {'device':'ide-hd', 'expected':True},                  # No drive specified
82    {'device':'ipmi-bmc-extern', 'expected':True},         # IPMI external bmc requires chardev attribute
83    {'device':'isa-debugcon', 'expected':True},            # Can't create serial device, empty char device
84    {'device':'isa-ipmi-bt', 'expected':True},             # IPMI device requires a bmc attribute to be set
85    {'device':'isa-ipmi-kcs', 'expected':True},            # IPMI device requires a bmc attribute to be set
86    {'device':'isa-parallel', 'expected':True},            # Can't create serial device, empty char device
87    {'device':'ivshmem-doorbell', 'expected':True},        # You must specify a 'chardev'
88    {'device':'ivshmem-plain', 'expected':True},           # You must specify a 'memdev'
89    {'device':'loader', 'expected':True},                  # please include valid arguments
90    {'device':'nand', 'expected':True},                    # Unsupported NAND block size 0x1
91    {'device':'nvdimm', 'expected':True},                  # 'memdev' property is not set
92    {'device':'nvme', 'expected':True},                    # Device initialization failed
93    {'device':'pc-dimm', 'expected':True},                 # 'memdev' property is not set
94    {'device':'pci-bridge', 'expected':True},              # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
95    {'device':'pci-bridge-seat', 'expected':True},         # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
96    {'device':'pxb', 'expected':True},                     # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
97    {'device':'scsi-block', 'expected':True},              # drive property not set
98    {'device':'scsi-disk', 'expected':True},               # drive property not set
99    {'device':'scsi-generic', 'expected':True},            # drive property not set
100    {'device':'scsi-hd', 'expected':True},                 # drive property not set
101    {'device':'spapr-pci-host-bridge', 'expected':True},   # BUID not specified for PHB
102    {'device':'spapr-rng', 'expected':True},               # spapr-rng needs an RNG backend!
103    {'device':'spapr-vty', 'expected':True},               # chardev property not set
104    {'device':'tpm-tis', 'expected':True},                 # tpm_tis: backend driver with id (null) could not be found
105    {'device':'unimplemented-device', 'expected':True},    # property 'size' not specified or zero
106    {'device':'usb-braille', 'expected':True},             # Property chardev is required
107    {'device':'usb-mtp', 'expected':True},                 # rootdir property must be configured
108    {'device':'usb-redir', 'expected':True},               # Parameter 'chardev' is missing
109    {'device':'usb-serial', 'expected':True},              # Property chardev is required
110    {'device':'usb-storage', 'expected':True},             # drive property not set
111    {'device':'vfio-amd-xgbe', 'expected':True},           # -device vfio-amd-xgbe: vfio error: wrong host device name
112    {'device':'vfio-calxeda-xgmac', 'expected':True},      # -device vfio-calxeda-xgmac: vfio error: wrong host device name
113    {'device':'vfio-pci', 'expected':True},                # No provided host device
114    {'device':'vfio-pci-igd-lpc-bridge', 'expected':True}, # VFIO dummy ISA/LPC bridge must have address 1f.0
115    {'device':'vhost-scsi.*', 'expected':True},            # vhost-scsi: missing wwpn
116    {'device':'vhost-vsock-device', 'expected':True},      # guest-cid property must be greater than 2
117    {'device':'vhost-vsock-pci', 'expected':True},         # guest-cid property must be greater than 2
118    {'device':'virtio-9p-ccw', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
119    {'device':'virtio-9p-device', 'expected':True},        # 9pfs device couldn't find fsdev with the id = NULL
120    {'device':'virtio-9p-pci', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
121    {'device':'virtio-blk-ccw', 'expected':True},          # drive property not set
122    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
123    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
124    {'device':'virtio-blk-pci', 'expected':True},          # drive property not set
125    {'device':'virtio-crypto-ccw', 'expected':True},       # 'cryptodev' parameter expects a valid object
126    {'device':'virtio-crypto-device', 'expected':True},    # 'cryptodev' parameter expects a valid object
127    {'device':'virtio-crypto-pci', 'expected':True},       # 'cryptodev' parameter expects a valid object
128    {'device':'virtio-input-host-device', 'expected':True}, # evdev property is required
129    {'device':'virtio-input-host-pci', 'expected':True},   # evdev property is required
130    {'device':'xen-pvdevice', 'expected':True},            # Device ID invalid, it must always be supplied
131    {'device':'vhost-vsock-ccw', 'expected':True},         # guest-cid property must be greater than 2
132    {'device':'zpci', 'expected':True},                    # target must be defined
133    {'device':'pnv-(occ|icp|lpc)', 'expected':True},       # required link 'xics' not found: Property '.xics' not found
134    {'device':'powernv-cpu-.*', 'expected':True},          # pnv_core_realize: required link 'xics' not found: Property '.xics' not found
135
136    # ioapic devices are already created by pc and will fail:
137    {'machine':'q35|pc.*', 'device':'kvm-ioapic', 'expected':True}, # Only 1 ioapics allowed
138    {'machine':'q35|pc.*', 'device':'ioapic', 'expected':True},     # Only 1 ioapics allowed
139
140    # "spapr-cpu-core needs a pseries machine"
141    {'machine':'(?!pseries).*', 'device':'.*-spapr-cpu-core', 'expected':True},
142
143    # KVM-specific devices shouldn't be tried without accel=kvm:
144    {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True},
145
146    # xen-specific machines and devices:
147    {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True},
148    {'accel':'(?!xen).*', 'device':'xen-.*', 'expected':True},
149
150    # this fails on some machine-types, but not all, so they don't have expected=True:
151    {'device':'vmgenid'}, # vmgenid requires DMA write support in fw_cfg, which this machine type does not provide
152
153    # Silence INFO messages for errors that are common on multiple
154    # devices/machines:
155    {'log':r"No '[\w-]+' bus found for device '[\w-]+'"},
156    {'log':r"images* must be given with the 'pflash' parameter"},
157    {'log':r"(Guest|ROM|Flash|Kernel) image must be specified"},
158    {'log':r"[cC]ould not load [\w ]+ (BIOS|bios) '[\w-]+\.bin'"},
159    {'log':r"Couldn't find rom image '[\w-]+\.bin'"},
160    {'log':r"speed mismatch trying to attach usb device"},
161    {'log':r"Can't create a second ISA bus"},
162    {'log':r"duplicate fw_cfg file name"},
163    # sysbus-related error messages: most machines reject most dynamic sysbus devices:
164    {'log':r"Option '-device [\w.,-]+' cannot be handled by this machine"},
165    {'log':r"Device [\w.,-]+ is not supported by this machine yet"},
166    {'log':r"Device [\w.,-]+ can not be dynamically instantiated"},
167    {'log':r"Platform Bus: Can not fit MMIO region of size "},
168    # other more specific errors we will ignore:
169    {'device':'.*-spapr-cpu-core', 'log':r"CPU core type should be"},
170    {'log':r"MSI(-X)? is not supported by interrupt controller"},
171    {'log':r"pxb-pcie? devices cannot reside on a PCIe? bus"},
172    {'log':r"Ignoring smp_cpus value"},
173    {'log':r"sd_init failed: Drive 'sd0' is already in use because it has been automatically connected to another device"},
174    {'log':r"This CPU requires a smaller page size than the system is using"},
175    {'log':r"MSI-X support is mandatory in the S390 architecture"},
176    {'log':r"rom check and register reset failed"},
177    {'log':r"Unable to initialize GIC, CPUState for CPU#0 not valid"},
178    {'log':r"Multiple VT220 operator consoles are not supported"},
179    {'log':r"core 0 already populated"},
180    {'log':r"could not find stage1 bootloader"},
181
182    # other exitcode=1 failures not listed above will just generate INFO messages:
183    {'exitcode':1, 'loglevel':logging.INFO},
184
185    # everything else (including SIGABRT and SIGSEGV) will be a fatal error:
186    {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL},
187]
188
189
190def whitelistTestCaseMatch(wl, t):
191    """Check if a test case specification can match a whitelist entry
192
193    This only checks if a whitelist entry is a candidate match
194    for a given test case, it won't check if the test case
195    results/output match the entry.  See whitelistResultMatch().
196    """
197    return (('machine' not in wl or
198             'machine' not in t or
199             re.match(wl['machine'] + '$', t['machine'])) and
200            ('accel' not in wl or
201             'accel' not in t or
202             re.match(wl['accel'] + '$', t['accel'])) and
203            ('device' not in wl or
204             'device' not in t or
205             re.match(wl['device'] + '$', t['device'])))
206
207
208def whitelistCandidates(t):
209    """Generate the list of candidates that can match a test case"""
210    for i, wl in enumerate(ERROR_WHITELIST):
211        if whitelistTestCaseMatch(wl, t):
212            yield (i, wl)
213
214
215def findExpectedResult(t):
216    """Check if there's an expected=True whitelist entry for a test case
217
218    Returns (i, wl) tuple, where i is the index in
219    ERROR_WHITELIST and wl is the whitelist entry itself.
220    """
221    for i, wl in whitelistCandidates(t):
222        if wl.get('expected'):
223            return (i, wl)
224
225
226def whitelistResultMatch(wl, r):
227    """Check if test case results/output match a whitelist entry
228
229    It is valid to call this function only if
230    whitelistTestCaseMatch() is True for the entry (e.g. on
231    entries returned by whitelistCandidates())
232    """
233    assert whitelistTestCaseMatch(wl, r['testcase'])
234    return ((wl.get('exitcode', 1) is None or
235             r['exitcode'] == wl.get('exitcode', 1)) and
236            ('log' not in wl or
237             re.search(wl['log'], r['log'], re.MULTILINE)))
238
239
240def checkResultWhitelist(r):
241    """Look up whitelist entry for a given test case result
242
243    Returns (i, wl) tuple, where i is the index in
244    ERROR_WHITELIST and wl is the whitelist entry itself.
245    """
246    for i, wl in whitelistCandidates(r['testcase']):
247        if whitelistResultMatch(wl, r):
248            return i, wl
249
250    raise Exception("this should never happen")
251
252
253def qemuOptsEscape(s):
254    """Escape option value QemuOpts"""
255    return s.replace(",", ",,")
256
257
258def formatTestCase(t):
259    """Format test case info as "key=value key=value" for prettier logging output"""
260    return ' '.join('%s=%s' % (k, v) for k, v in t.items())
261
262
263def qomListTypeNames(vm, **kwargs):
264    """Run qom-list-types QMP command, return type names"""
265    types = vm.command('qom-list-types', **kwargs)
266    return [t['name'] for t in types]
267
268
269def infoQDM(vm):
270    """Parse 'info qdm' output"""
271    args = {'command-line': 'info qdm'}
272    devhelp = vm.command('human-monitor-command', **args)
273    for l in devhelp.split('\n'):
274        l = l.strip()
275        if l == '' or l.endswith(':'):
276            continue
277        d = {'name': re.search(r'name "([^"]+)"', l).group(1),
278             'no-user': (re.search(', no-user', l) is not None)}
279        yield d
280
281
282class QemuBinaryInfo(object):
283    def __init__(self, binary, devtype):
284        if devtype is None:
285            devtype = 'device'
286
287        self.binary = binary
288        self._machine_info = {}
289
290        dbg("devtype: %r", devtype)
291        args = ['-S', '-machine', 'none,accel=kvm:tcg']
292        dbg("querying info for QEMU binary: %s", binary)
293        vm = QEMUMachine(binary=binary, args=args)
294        vm.launch()
295        try:
296            self.alldevs = set(qomListTypeNames(vm, implements=devtype, abstract=False))
297            # there's no way to query DeviceClass::user_creatable using QMP,
298            # so use 'info qdm':
299            self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']])
300            self.machines = list(m['name'] for m in vm.command('query-machines'))
301            self.user_devs = self.alldevs.difference(self.no_user_devs)
302            self.kvm_available = vm.command('query-kvm')['enabled']
303        finally:
304            vm.shutdown()
305
306    def machineInfo(self, machine):
307        """Query for information on a specific machine-type
308
309        Results are cached internally, in case the same machine-
310        type is queried multiple times.
311        """
312        if machine in self._machine_info:
313            return self._machine_info[machine]
314
315        mi = {}
316        args = ['-S', '-machine', '%s' % (machine)]
317        dbg("querying machine info for binary=%s machine=%s", self.binary, machine)
318        vm = QEMUMachine(binary=self.binary, args=args)
319        try:
320            vm.launch()
321            mi['runnable'] = True
322        except KeyboardInterrupt:
323            raise
324        except:
325            dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info())
326            dbg("log: %r", vm.get_log())
327            mi['runnable'] = False
328
329        vm.shutdown()
330        self._machine_info[machine] = mi
331        return mi
332
333
334BINARY_INFO = {}
335
336
337def getBinaryInfo(args, binary):
338    if binary not in BINARY_INFO:
339        BINARY_INFO[binary] = QemuBinaryInfo(binary, args.devtype)
340    return BINARY_INFO[binary]
341
342
343def checkOneCase(args, testcase):
344    """Check one specific case
345
346    Returns a dictionary containing failure information on error,
347    or None on success
348    """
349    binary = testcase['binary']
350    accel = testcase['accel']
351    machine = testcase['machine']
352    device = testcase['device']
353
354    dbg("will test: %r", testcase)
355
356    args = ['-S', '-machine', '%s,accel=%s' % (machine, accel),
357            '-device', qemuOptsEscape(device)]
358    cmdline = ' '.join([binary] + args)
359    dbg("will launch QEMU: %s", cmdline)
360    vm = QEMUMachine(binary=binary, args=args)
361
362    exc_traceback = None
363    try:
364        vm.launch()
365    except KeyboardInterrupt:
366        raise
367    except:
368        exc_traceback = traceback.format_exc()
369        dbg("Exception while running test case")
370    finally:
371        vm.shutdown()
372        ec = vm.exitcode()
373        log = vm.get_log()
374
375    if exc_traceback is not None or ec != 0:
376        return {'exc_traceback':exc_traceback,
377                'exitcode':ec,
378                'log':log,
379                'testcase':testcase,
380                'cmdline':cmdline}
381
382
383def binariesToTest(args, testcase):
384    if args.qemu:
385        r = args.qemu
386    else:
387        r = glob.glob('./*-softmmu/qemu-system-*')
388    return r
389
390
391def accelsToTest(args, testcase):
392    if getBinaryInfo(args, testcase['binary']).kvm_available:
393        yield 'kvm'
394    yield 'tcg'
395
396
397def machinesToTest(args, testcase):
398    return getBinaryInfo(args, testcase['binary']).machines
399
400
401def devicesToTest(args, testcase):
402    return getBinaryInfo(args, testcase['binary']).user_devs
403
404
405TESTCASE_VARIABLES = [
406    ('binary', binariesToTest),
407    ('accel', accelsToTest),
408    ('machine', machinesToTest),
409    ('device', devicesToTest),
410]
411
412
413def genCases1(args, testcases, var, fn):
414    """Generate new testcases for one variable
415
416    If an existing item already has a variable set, don't
417    generate new items and just return it directly. This
418    allows the "-t" command-line option to be used to choose
419    a specific test case.
420    """
421    for testcase in testcases:
422        if var in testcase:
423            yield testcase.copy()
424        else:
425            for i in fn(args, testcase):
426                t = testcase.copy()
427                t[var] = i
428                yield t
429
430
431def genCases(args, testcase):
432    """Generate test cases for all variables
433    """
434    cases = [testcase.copy()]
435    for var, fn in TESTCASE_VARIABLES:
436        dbg("var: %r, fn: %r", var, fn)
437        cases = genCases1(args, cases, var, fn)
438    return cases
439
440
441def casesToTest(args, testcase):
442    cases = genCases(args, testcase)
443    if args.random:
444        cases = list(cases)
445        cases = random.sample(cases, min(args.random, len(cases)))
446    if args.debug:
447        cases = list(cases)
448        dbg("%d test cases to test", len(cases))
449    if args.shuffle:
450        cases = list(cases)
451        random.shuffle(cases)
452    return cases
453
454
455def logFailure(f, level):
456    t = f['testcase']
457    logger.log(level, "failed: %s", formatTestCase(t))
458    logger.log(level, "cmdline: %s", f['cmdline'])
459    for l in f['log'].strip().split('\n'):
460        logger.log(level, "log: %s", l)
461    logger.log(level, "exit code: %r", f['exitcode'])
462    if f['exc_traceback']:
463        logger.log(level, "exception:")
464        for l in f['exc_traceback'].split('\n'):
465            logger.log(level, "  %s", l.rstrip('\n'))
466
467
468def main():
469    parser = argparse.ArgumentParser(description="QEMU -device crash test")
470    parser.add_argument('-t', metavar='KEY=VALUE', nargs='*',
471                        help="Limit test cases to KEY=VALUE",
472                        action='append', dest='testcases', default=[])
473    parser.add_argument('-d', '--debug', action='store_true',
474                        help='debug output')
475    parser.add_argument('-v', '--verbose', action='store_true', default=True,
476                        help='verbose output')
477    parser.add_argument('-q', '--quiet', dest='verbose', action='store_false',
478                        help='non-verbose output')
479    parser.add_argument('-r', '--random', type=int, metavar='COUNT',
480                        help='run a random sample of COUNT test cases',
481                        default=0)
482    parser.add_argument('--shuffle', action='store_true',
483                        help='Run test cases in random order')
484    parser.add_argument('--dry-run', action='store_true',
485                        help="Don't run any tests, just generate list")
486    parser.add_argument('-D', '--devtype', metavar='TYPE',
487                        help="Test only device types that implement TYPE")
488    parser.add_argument('-Q', '--quick', action='store_true', default=True,
489                        help="Quick mode: skip test cases that are expected to fail")
490    parser.add_argument('-F', '--full', action='store_false', dest='quick',
491                        help="Full mode: test cases that are expected to fail")
492    parser.add_argument('--strict', action='store_true', dest='strict',
493                        help="Treat all warnings as fatal")
494    parser.add_argument('qemu', nargs='*', metavar='QEMU',
495                        help='QEMU binary to run')
496    args = parser.parse_args()
497
498    if args.debug:
499        lvl = logging.DEBUG
500    elif args.verbose:
501        lvl = logging.INFO
502    else:
503        lvl = logging.WARN
504    logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s')
505
506    fatal_failures = []
507    wl_stats = {}
508    skipped = 0
509    total = 0
510
511    tc = {}
512    dbg("testcases: %r", args.testcases)
513    if args.testcases:
514        for t in chain(*args.testcases):
515            for kv in t.split():
516                k, v = kv.split('=', 1)
517                tc[k] = v
518
519    if len(binariesToTest(args, tc)) == 0:
520        print("No QEMU binary found", file=sys.stderr)
521        parser.print_usage(sys.stderr)
522        return 1
523
524    for t in casesToTest(args, tc):
525        logger.info("running test case: %s", formatTestCase(t))
526        total += 1
527
528        expected_match = findExpectedResult(t)
529        if (args.quick and
530                (expected_match or
531                 not getBinaryInfo(args, t['binary']).machineInfo(t['machine'])['runnable'])):
532            dbg("skipped: %s", formatTestCase(t))
533            skipped += 1
534            continue
535
536        if args.dry_run:
537            continue
538
539        try:
540            f = checkOneCase(args, t)
541        except KeyboardInterrupt:
542            break
543
544        if f:
545            i, wl = checkResultWhitelist(f)
546            dbg("testcase: %r, whitelist match: %r", t, wl)
547            wl_stats.setdefault(i, []).append(f)
548            level = wl.get('loglevel', logging.DEBUG)
549            logFailure(f, level)
550            if wl.get('fatal') or (args.strict and level >= logging.WARN):
551                fatal_failures.append(f)
552        else:
553            dbg("success: %s", formatTestCase(t))
554            if expected_match:
555                logger.warn("Didn't fail as expected: %s", formatTestCase(t))
556
557    logger.info("Total: %d test cases", total)
558    if skipped:
559        logger.info("Skipped %d test cases", skipped)
560
561    if args.debug:
562        stats = sorted([(len(wl_stats.get(i, [])), wl) for i, wl in
563                         enumerate(ERROR_WHITELIST)], key=lambda x: x[0])
564        for count, wl in stats:
565            dbg("whitelist entry stats: %d: %r", count, wl)
566
567    if fatal_failures:
568        for f in fatal_failures:
569            t = f['testcase']
570            logger.error("Fatal failure: %s", formatTestCase(t))
571        logger.error("Fatal failures on some machine/device combinations")
572        return 1
573
574if __name__ == '__main__':
575    sys.exit(main())
576