1#!/usr/bin/env python3 2# 3# Copyright (c) 2017 Red Hat Inc 4# 5# Author: 6# Eduardo Habkost <ehabkost@redhat.com> 7# 8# This program is free software; you can redistribute it and/or modify 9# it under the terms of the GNU General Public License as published by 10# the Free Software Foundation; either version 2 of the License, or 11# (at your option) any later version. 12# 13# This program is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16# GNU General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License along 19# with this program; if not, write to the Free Software Foundation, Inc., 20# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 21 22""" 23Run QEMU with all combinations of -machine and -device types, 24check for crashes and unexpected errors. 25""" 26 27import os 28import sys 29import glob 30import logging 31import traceback 32import re 33import random 34import argparse 35from itertools import chain 36 37sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python')) 38from qemu.machine import QEMUMachine 39 40logger = logging.getLogger('device-crash-test') 41dbg = logger.debug 42 43 44# Purposes of the following rule list: 45# * Avoiding verbose log messages when we find known non-fatal 46# (exitcode=1) errors 47# * Avoiding fatal errors when we find known crashes 48# * Skipping machines/devices that are known not to work out of 49# the box, when running in --quick mode 50# 51# Keeping the rule list updated is desirable, but not required, 52# because unexpected cases where QEMU exits with exitcode=1 will 53# just trigger a INFO message. 54 55# Valid error rule keys: 56# * accel: regexp, full match only 57# * machine: regexp, full match only 58# * device: regexp, full match only 59# * log: regexp, partial match allowed 60# * exitcode: if not present, defaults to 1. If None, matches any exitcode 61# * warn: if True, matching failures will be logged as warnings 62# * expected: if True, QEMU is expected to always fail every time 63# when testing the corresponding test case 64# * loglevel: log level of log output when there's a match. 65ERROR_RULE_LIST = [ 66 # Machines that won't work out of the box: 67 # MACHINE | ERROR MESSAGE 68 {'machine':'niagara', 'expected':True}, # Unable to load a firmware for -M niagara 69 {'machine':'boston', 'expected':True}, # Please provide either a -kernel or -bios argument 70 {'machine':'leon3_generic', 'expected':True}, # Can't read bios image (null) 71 72 # devices that don't work out of the box because they require extra options to "-device DEV": 73 # DEVICE | ERROR MESSAGE 74 {'device':'.*-(i386|x86_64)-cpu', 'expected':True}, # CPU socket-id is not set 75 {'device':'icp', 'expected':True}, # icp_realize: required link 'xics' not found: Property '.xics' not found 76 {'device':'ics', 'expected':True}, # ics_base_realize: required link 'xics' not found: Property '.xics' not found 77 # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True 78 {'device':'ide-cd'}, # No drive specified 79 {'device':'ide-hd', 'expected':True}, # No drive specified 80 {'device':'ipmi-bmc-extern', 'expected':True}, # IPMI external bmc requires chardev attribute 81 {'device':'isa-debugcon', 'expected':True}, # Can't create serial device, empty char device 82 {'device':'isa-ipmi-bt', 'expected':True}, # IPMI device requires a bmc attribute to be set 83 {'device':'isa-ipmi-kcs', 'expected':True}, # IPMI device requires a bmc attribute to be set 84 {'device':'isa-parallel', 'expected':True}, # Can't create serial device, empty char device 85 {'device':'ivshmem-doorbell', 'expected':True}, # You must specify a 'chardev' 86 {'device':'ivshmem-plain', 'expected':True}, # You must specify a 'memdev' 87 {'device':'loader', 'expected':True}, # please include valid arguments 88 {'device':'nand', 'expected':True}, # Unsupported NAND block size 0x1 89 {'device':'nvdimm', 'expected':True}, # 'memdev' property is not set 90 {'device':'nvme', 'expected':True}, # Device initialization failed 91 {'device':'pc-dimm', 'expected':True}, # 'memdev' property is not set 92 {'device':'pci-bridge', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. 93 {'device':'pci-bridge-seat', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. 94 {'device':'pxb', 'expected':True}, # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0. 95 {'device':'scsi-block', 'expected':True}, # drive property not set 96 {'device':'scsi-generic', 'expected':True}, # drive property not set 97 {'device':'scsi-hd', 'expected':True}, # drive property not set 98 {'device':'spapr-pci-host-bridge', 'expected':True}, # BUID not specified for PHB 99 {'device':'spapr-rng', 'expected':True}, # spapr-rng needs an RNG backend! 100 {'device':'spapr-vty', 'expected':True}, # chardev property not set 101 {'device':'tpm-tis', 'expected':True}, # tpm_tis: backend driver with id (null) could not be found 102 {'device':'unimplemented-device', 'expected':True}, # property 'size' not specified or zero 103 {'device':'usb-braille', 'expected':True}, # Property chardev is required 104 {'device':'usb-mtp', 'expected':True}, # rootdir property must be configured 105 {'device':'usb-redir', 'expected':True}, # Parameter 'chardev' is missing 106 {'device':'usb-serial', 'expected':True}, # Property chardev is required 107 {'device':'usb-storage', 'expected':True}, # drive property not set 108 {'device':'vfio-amd-xgbe', 'expected':True}, # -device vfio-amd-xgbe: vfio error: wrong host device name 109 {'device':'vfio-calxeda-xgmac', 'expected':True}, # -device vfio-calxeda-xgmac: vfio error: wrong host device name 110 {'device':'vfio-pci', 'expected':True}, # No provided host device 111 {'device':'vfio-pci-igd-lpc-bridge', 'expected':True}, # VFIO dummy ISA/LPC bridge must have address 1f.0 112 {'device':'vhost-scsi.*', 'expected':True}, # vhost-scsi: missing wwpn 113 {'device':'vhost-vsock-device', 'expected':True}, # guest-cid property must be greater than 2 114 {'device':'vhost-vsock-pci', 'expected':True}, # guest-cid property must be greater than 2 115 {'device':'virtio-9p-ccw', 'expected':True}, # 9pfs device couldn't find fsdev with the id = NULL 116 {'device':'virtio-9p-device', 'expected':True}, # 9pfs device couldn't find fsdev with the id = NULL 117 {'device':'virtio-9p-pci', 'expected':True}, # 9pfs device couldn't find fsdev with the id = NULL 118 {'device':'virtio-blk-ccw', 'expected':True}, # drive property not set 119 {'device':'virtio-blk-device', 'expected':True}, # drive property not set 120 {'device':'virtio-blk-device', 'expected':True}, # drive property not set 121 {'device':'virtio-blk-pci', 'expected':True}, # drive property not set 122 {'device':'virtio-crypto-ccw', 'expected':True}, # 'cryptodev' parameter expects a valid object 123 {'device':'virtio-crypto-device', 'expected':True}, # 'cryptodev' parameter expects a valid object 124 {'device':'virtio-crypto-pci', 'expected':True}, # 'cryptodev' parameter expects a valid object 125 {'device':'virtio-input-host-device', 'expected':True}, # evdev property is required 126 {'device':'virtio-input-host-pci', 'expected':True}, # evdev property is required 127 {'device':'xen-pvdevice', 'expected':True}, # Device ID invalid, it must always be supplied 128 {'device':'vhost-vsock-ccw', 'expected':True}, # guest-cid property must be greater than 2 129 {'device':'zpci', 'expected':True}, # target must be defined 130 {'device':'pnv-(occ|icp|lpc)', 'expected':True}, # required link 'xics' not found: Property '.xics' not found 131 {'device':'powernv-cpu-.*', 'expected':True}, # pnv_core_realize: required link 'xics' not found: Property '.xics' not found 132 133 # ioapic devices are already created by pc and will fail: 134 {'machine':'q35|pc.*', 'device':'kvm-ioapic', 'expected':True}, # Only 1 ioapics allowed 135 {'machine':'q35|pc.*', 'device':'ioapic', 'expected':True}, # Only 1 ioapics allowed 136 137 # "spapr-cpu-core needs a pseries machine" 138 {'machine':'(?!pseries).*', 'device':'.*-spapr-cpu-core', 'expected':True}, 139 140 # KVM-specific devices shouldn't be tried without accel=kvm: 141 {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True}, 142 143 # xen-specific machines and devices: 144 {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True}, 145 {'accel':'(?!xen).*', 'device':'xen-.*', 'expected':True}, 146 147 # this fails on some machine-types, but not all, so they don't have expected=True: 148 {'device':'vmgenid'}, # vmgenid requires DMA write support in fw_cfg, which this machine type does not provide 149 150 # Silence INFO messages for errors that are common on multiple 151 # devices/machines: 152 {'log':r"No '[\w-]+' bus found for device '[\w-]+'"}, 153 {'log':r"images* must be given with the 'pflash' parameter"}, 154 {'log':r"(Guest|ROM|Flash|Kernel) image must be specified"}, 155 {'log':r"[cC]ould not load [\w ]+ (BIOS|bios) '[\w-]+\.bin'"}, 156 {'log':r"Couldn't find rom image '[\w-]+\.bin'"}, 157 {'log':r"speed mismatch trying to attach usb device"}, 158 {'log':r"Can't create a second ISA bus"}, 159 {'log':r"duplicate fw_cfg file name"}, 160 # sysbus-related error messages: most machines reject most dynamic sysbus devices: 161 {'log':r"Option '-device [\w.,-]+' cannot be handled by this machine"}, 162 {'log':r"Device [\w.,-]+ is not supported by this machine yet"}, 163 {'log':r"Device [\w.,-]+ can not be dynamically instantiated"}, 164 {'log':r"Platform Bus: Can not fit MMIO region of size "}, 165 # other more specific errors we will ignore: 166 {'device':'.*-spapr-cpu-core', 'log':r"CPU core type should be"}, 167 {'log':r"MSI(-X)? is not supported by interrupt controller"}, 168 {'log':r"pxb-pcie? devices cannot reside on a PCIe? bus"}, 169 {'log':r"Ignoring smp_cpus value"}, 170 {'log':r"sd_init failed: Drive 'sd0' is already in use because it has been automatically connected to another device"}, 171 {'log':r"This CPU requires a smaller page size than the system is using"}, 172 {'log':r"MSI-X support is mandatory in the S390 architecture"}, 173 {'log':r"rom check and register reset failed"}, 174 {'log':r"Unable to initialize GIC, CPUState for CPU#0 not valid"}, 175 {'log':r"Multiple VT220 operator consoles are not supported"}, 176 {'log':r"core 0 already populated"}, 177 {'log':r"could not find stage1 bootloader"}, 178 179 # other exitcode=1 failures not listed above will just generate INFO messages: 180 {'exitcode':1, 'loglevel':logging.INFO}, 181 182 # everything else (including SIGABRT and SIGSEGV) will be a fatal error: 183 {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL}, 184] 185 186 187def errorRuleTestCaseMatch(rule, t): 188 """Check if a test case specification can match a error rule 189 190 This only checks if a error rule is a candidate match 191 for a given test case, it won't check if the test case 192 results/output match the rule. See ruleListResultMatch(). 193 """ 194 return (('machine' not in rule or 195 'machine' not in t or 196 re.match(rule['machine'] + '$', t['machine'])) and 197 ('accel' not in rule or 198 'accel' not in t or 199 re.match(rule['accel'] + '$', t['accel'])) and 200 ('device' not in rule or 201 'device' not in t or 202 re.match(rule['device'] + '$', t['device']))) 203 204 205def ruleListCandidates(t): 206 """Generate the list of candidates that can match a test case""" 207 for i, rule in enumerate(ERROR_RULE_LIST): 208 if errorRuleTestCaseMatch(rule, t): 209 yield (i, rule) 210 211 212def findExpectedResult(t): 213 """Check if there's an expected=True error rule for a test case 214 215 Returns (i, rule) tuple, where i is the index in 216 ERROR_RULE_LIST and rule is the error rule itself. 217 """ 218 for i, rule in ruleListCandidates(t): 219 if rule.get('expected'): 220 return (i, rule) 221 222 223def ruleListResultMatch(rule, r): 224 """Check if test case results/output match a error rule 225 226 It is valid to call this function only if 227 errorRuleTestCaseMatch() is True for the rule (e.g. on 228 rules returned by ruleListCandidates()) 229 """ 230 assert errorRuleTestCaseMatch(rule, r['testcase']) 231 return ((rule.get('exitcode', 1) is None or 232 r['exitcode'] == rule.get('exitcode', 1)) and 233 ('log' not in rule or 234 re.search(rule['log'], r['log'], re.MULTILINE))) 235 236 237def checkResultRuleList(r): 238 """Look up error rule for a given test case result 239 240 Returns (i, rule) tuple, where i is the index in 241 ERROR_RULE_LIST and rule is the error rule itself. 242 """ 243 for i, rule in ruleListCandidates(r['testcase']): 244 if ruleListResultMatch(rule, r): 245 return i, rule 246 247 raise Exception("this should never happen") 248 249 250def qemuOptsEscape(s): 251 """Escape option value QemuOpts""" 252 return s.replace(",", ",,") 253 254 255def formatTestCase(t): 256 """Format test case info as "key=value key=value" for prettier logging output""" 257 return ' '.join('%s=%s' % (k, v) for k, v in t.items()) 258 259 260def qomListTypeNames(vm, **kwargs): 261 """Run qom-list-types QMP command, return type names""" 262 types = vm.command('qom-list-types', **kwargs) 263 return [t['name'] for t in types] 264 265 266def infoQDM(vm): 267 """Parse 'info qdm' output""" 268 args = {'command-line': 'info qdm'} 269 devhelp = vm.command('human-monitor-command', **args) 270 for l in devhelp.split('\n'): 271 l = l.strip() 272 if l == '' or l.endswith(':'): 273 continue 274 d = {'name': re.search(r'name "([^"]+)"', l).group(1), 275 'no-user': (re.search(', no-user', l) is not None)} 276 yield d 277 278 279class QemuBinaryInfo(object): 280 def __init__(self, binary, devtype): 281 if devtype is None: 282 devtype = 'device' 283 284 self.binary = binary 285 self._machine_info = {} 286 287 dbg("devtype: %r", devtype) 288 args = ['-S', '-machine', 'none,accel=kvm:tcg'] 289 dbg("querying info for QEMU binary: %s", binary) 290 vm = QEMUMachine(binary=binary, args=args) 291 vm.launch() 292 try: 293 self.alldevs = set(qomListTypeNames(vm, implements=devtype, abstract=False)) 294 # there's no way to query DeviceClass::user_creatable using QMP, 295 # so use 'info qdm': 296 self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']]) 297 self.machines = list(m['name'] for m in vm.command('query-machines')) 298 self.user_devs = self.alldevs.difference(self.no_user_devs) 299 self.kvm_available = vm.command('query-kvm')['enabled'] 300 finally: 301 vm.shutdown() 302 303 def machineInfo(self, machine): 304 """Query for information on a specific machine-type 305 306 Results are cached internally, in case the same machine- 307 type is queried multiple times. 308 """ 309 if machine in self._machine_info: 310 return self._machine_info[machine] 311 312 mi = {} 313 args = ['-S', '-machine', '%s' % (machine)] 314 dbg("querying machine info for binary=%s machine=%s", self.binary, machine) 315 vm = QEMUMachine(binary=self.binary, args=args) 316 try: 317 vm.launch() 318 mi['runnable'] = True 319 except KeyboardInterrupt: 320 raise 321 except: 322 dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info()) 323 dbg("log: %r", vm.get_log()) 324 mi['runnable'] = False 325 326 vm.shutdown() 327 self._machine_info[machine] = mi 328 return mi 329 330 331BINARY_INFO = {} 332 333 334def getBinaryInfo(args, binary): 335 if binary not in BINARY_INFO: 336 BINARY_INFO[binary] = QemuBinaryInfo(binary, args.devtype) 337 return BINARY_INFO[binary] 338 339 340def checkOneCase(args, testcase): 341 """Check one specific case 342 343 Returns a dictionary containing failure information on error, 344 or None on success 345 """ 346 binary = testcase['binary'] 347 accel = testcase['accel'] 348 machine = testcase['machine'] 349 device = testcase['device'] 350 351 dbg("will test: %r", testcase) 352 353 args = ['-S', '-machine', '%s,accel=%s' % (machine, accel), 354 '-device', qemuOptsEscape(device)] 355 cmdline = ' '.join([binary] + args) 356 dbg("will launch QEMU: %s", cmdline) 357 vm = QEMUMachine(binary=binary, args=args) 358 359 exc_traceback = None 360 try: 361 vm.launch() 362 except KeyboardInterrupt: 363 raise 364 except: 365 exc_traceback = traceback.format_exc() 366 dbg("Exception while running test case") 367 finally: 368 vm.shutdown() 369 ec = vm.exitcode() 370 log = vm.get_log() 371 372 if exc_traceback is not None or ec != 0: 373 return {'exc_traceback':exc_traceback, 374 'exitcode':ec, 375 'log':log, 376 'testcase':testcase, 377 'cmdline':cmdline} 378 379 380def binariesToTest(args, testcase): 381 if args.qemu: 382 r = args.qemu 383 else: 384 r = [f.path for f in os.scandir('.') 385 if f.name.startswith('qemu-system-') and 386 f.is_file() and os.access(f, os.X_OK)] 387 return r 388 389 390def accelsToTest(args, testcase): 391 if getBinaryInfo(args, testcase['binary']).kvm_available: 392 yield 'kvm' 393 yield 'tcg' 394 395 396def machinesToTest(args, testcase): 397 return getBinaryInfo(args, testcase['binary']).machines 398 399 400def devicesToTest(args, testcase): 401 return getBinaryInfo(args, testcase['binary']).user_devs 402 403 404TESTCASE_VARIABLES = [ 405 ('binary', binariesToTest), 406 ('accel', accelsToTest), 407 ('machine', machinesToTest), 408 ('device', devicesToTest), 409] 410 411 412def genCases1(args, testcases, var, fn): 413 """Generate new testcases for one variable 414 415 If an existing item already has a variable set, don't 416 generate new items and just return it directly. This 417 allows the "-t" command-line option to be used to choose 418 a specific test case. 419 """ 420 for testcase in testcases: 421 if var in testcase: 422 yield testcase.copy() 423 else: 424 for i in fn(args, testcase): 425 t = testcase.copy() 426 t[var] = i 427 yield t 428 429 430def genCases(args, testcase): 431 """Generate test cases for all variables 432 """ 433 cases = [testcase.copy()] 434 for var, fn in TESTCASE_VARIABLES: 435 dbg("var: %r, fn: %r", var, fn) 436 cases = genCases1(args, cases, var, fn) 437 return cases 438 439 440def casesToTest(args, testcase): 441 cases = genCases(args, testcase) 442 if args.random: 443 cases = list(cases) 444 cases = random.sample(cases, min(args.random, len(cases))) 445 if args.debug: 446 cases = list(cases) 447 dbg("%d test cases to test", len(cases)) 448 if args.shuffle: 449 cases = list(cases) 450 random.shuffle(cases) 451 return cases 452 453 454def logFailure(f, level): 455 t = f['testcase'] 456 logger.log(level, "failed: %s", formatTestCase(t)) 457 logger.log(level, "cmdline: %s", f['cmdline']) 458 for l in f['log'].strip().split('\n'): 459 logger.log(level, "log: %s", l) 460 logger.log(level, "exit code: %r", f['exitcode']) 461 if f['exc_traceback']: 462 logger.log(level, "exception:") 463 for l in f['exc_traceback'].split('\n'): 464 logger.log(level, " %s", l.rstrip('\n')) 465 466 467def main(): 468 parser = argparse.ArgumentParser(description="QEMU -device crash test") 469 parser.add_argument('-t', metavar='KEY=VALUE', nargs='*', 470 help="Limit test cases to KEY=VALUE", 471 action='append', dest='testcases', default=[]) 472 parser.add_argument('-d', '--debug', action='store_true', 473 help='debug output') 474 parser.add_argument('-v', '--verbose', action='store_true', default=True, 475 help='verbose output') 476 parser.add_argument('-q', '--quiet', dest='verbose', action='store_false', 477 help='non-verbose output') 478 parser.add_argument('-r', '--random', type=int, metavar='COUNT', 479 help='run a random sample of COUNT test cases', 480 default=0) 481 parser.add_argument('--shuffle', action='store_true', 482 help='Run test cases in random order') 483 parser.add_argument('--dry-run', action='store_true', 484 help="Don't run any tests, just generate list") 485 parser.add_argument('-D', '--devtype', metavar='TYPE', 486 help="Test only device types that implement TYPE") 487 parser.add_argument('-Q', '--quick', action='store_true', default=True, 488 help="Quick mode: skip test cases that are expected to fail") 489 parser.add_argument('-F', '--full', action='store_false', dest='quick', 490 help="Full mode: test cases that are expected to fail") 491 parser.add_argument('--strict', action='store_true', dest='strict', 492 help="Treat all warnings as fatal") 493 parser.add_argument('qemu', nargs='*', metavar='QEMU', 494 help='QEMU binary to run') 495 args = parser.parse_args() 496 497 if args.debug: 498 lvl = logging.DEBUG 499 elif args.verbose: 500 lvl = logging.INFO 501 else: 502 lvl = logging.WARN 503 logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s') 504 505 fatal_failures = [] 506 wl_stats = {} 507 skipped = 0 508 total = 0 509 510 tc = {} 511 dbg("testcases: %r", args.testcases) 512 if args.testcases: 513 for t in chain(*args.testcases): 514 for kv in t.split(): 515 k, v = kv.split('=', 1) 516 tc[k] = v 517 518 if len(binariesToTest(args, tc)) == 0: 519 print("No QEMU binary found", file=sys.stderr) 520 parser.print_usage(sys.stderr) 521 return 1 522 523 for t in casesToTest(args, tc): 524 logger.info("running test case: %s", formatTestCase(t)) 525 total += 1 526 527 expected_match = findExpectedResult(t) 528 if (args.quick and 529 (expected_match or 530 not getBinaryInfo(args, t['binary']).machineInfo(t['machine'])['runnable'])): 531 dbg("skipped: %s", formatTestCase(t)) 532 skipped += 1 533 continue 534 535 if args.dry_run: 536 continue 537 538 try: 539 f = checkOneCase(args, t) 540 except KeyboardInterrupt: 541 break 542 543 if f: 544 i, rule = checkResultRuleList(f) 545 dbg("testcase: %r, rule list match: %r", t, rule) 546 wl_stats.setdefault(i, []).append(f) 547 level = rule.get('loglevel', logging.DEBUG) 548 logFailure(f, level) 549 if rule.get('fatal') or (args.strict and level >= logging.WARN): 550 fatal_failures.append(f) 551 else: 552 dbg("success: %s", formatTestCase(t)) 553 if expected_match: 554 logger.warn("Didn't fail as expected: %s", formatTestCase(t)) 555 556 logger.info("Total: %d test cases", total) 557 if skipped: 558 logger.info("Skipped %d test cases", skipped) 559 560 if args.debug: 561 stats = sorted([(len(wl_stats.get(i, [])), rule) for i, rule in 562 enumerate(ERROR_RULE_LIST)], key=lambda x: x[0]) 563 for count, rule in stats: 564 dbg("error rule stats: %d: %r", count, rule) 565 566 if fatal_failures: 567 for f in fatal_failures: 568 t = f['testcase'] 569 logger.error("Fatal failure: %s", formatTestCase(t)) 570 logger.error("Fatal failures on some machine/device combinations") 571 return 1 572 573if __name__ == '__main__': 574 sys.exit(main()) 575