1# 2# Copyright (C) 2013 Intel Corporation 3# 4# SPDX-License-Identifier: MIT 5# 6 7# This module provides a class for starting qemu images using runqemu. 8# It's used by testimage.bbclass. 9 10import subprocess 11import os 12import sys 13import time 14import signal 15import re 16import socket 17import select 18import errno 19import string 20import threading 21import codecs 22import logging 23from oeqa.utils.dump import HostDumper 24 25# Get Unicode non printable control chars 26control_range = list(range(0,32))+list(range(127,160)) 27control_chars = [chr(x) for x in control_range 28 if chr(x) not in string.printable] 29re_control_char = re.compile('[%s]' % re.escape("".join(control_chars))) 30 31class QemuRunner: 32 33 def __init__(self, machine, rootfs, display, tmpdir, deploy_dir_image, logfile, boottime, dump_dir, dump_host_cmds, 34 use_kvm, logger, use_slirp=False): 35 36 # Popen object for runqemu 37 self.runqemu = None 38 # pid of the qemu process that runqemu will start 39 self.qemupid = None 40 # target ip - from the command line or runqemu output 41 self.ip = None 42 # host ip - where qemu is running 43 self.server_ip = None 44 # target ip netmask 45 self.netmask = None 46 47 self.machine = machine 48 self.rootfs = rootfs 49 self.display = display 50 self.tmpdir = tmpdir 51 self.deploy_dir_image = deploy_dir_image 52 self.logfile = logfile 53 self.boottime = boottime 54 self.logged = False 55 self.thread = None 56 self.use_kvm = use_kvm 57 self.use_slirp = use_slirp 58 self.msg = '' 59 60 self.runqemutime = 120 61 self.qemu_pidfile = 'pidfile_'+str(os.getpid()) 62 self.host_dumper = HostDumper(dump_host_cmds, dump_dir) 63 self.monitorpipe = None 64 65 self.logger = logger 66 67 def create_socket(self): 68 try: 69 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 70 sock.setblocking(0) 71 sock.bind(("127.0.0.1",0)) 72 sock.listen(2) 73 port = sock.getsockname()[1] 74 self.logger.debug("Created listening socket for qemu serial console on: 127.0.0.1:%s" % port) 75 return (sock, port) 76 77 except socket.error: 78 sock.close() 79 raise 80 81 def log(self, msg): 82 if self.logfile: 83 # It is needed to sanitize the data received from qemu 84 # because is possible to have control characters 85 msg = msg.decode("utf-8", errors='ignore') 86 msg = re_control_char.sub('', msg) 87 self.msg += msg 88 with codecs.open(self.logfile, "a", encoding="utf-8") as f: 89 f.write("%s" % msg) 90 91 def getOutput(self, o): 92 import fcntl 93 fl = fcntl.fcntl(o, fcntl.F_GETFL) 94 fcntl.fcntl(o, fcntl.F_SETFL, fl | os.O_NONBLOCK) 95 return os.read(o.fileno(), 1000000).decode("utf-8") 96 97 98 def handleSIGCHLD(self, signum, frame): 99 if self.runqemu and self.runqemu.poll(): 100 if self.runqemu.returncode: 101 self.logger.debug('runqemu exited with code %d' % self.runqemu.returncode) 102 self.logger.debug("Output from runqemu:\n%s" % self.getOutput(self.runqemu.stdout)) 103 self.stop() 104 self._dump_host() 105 raise SystemExit 106 107 def start(self, qemuparams = None, get_ip = True, extra_bootparams = None, runqemuparams='', launch_cmd=None, discard_writes=True): 108 env = os.environ.copy() 109 if self.display: 110 env["DISPLAY"] = self.display 111 # Set this flag so that Qemu doesn't do any grabs as SDL grabs 112 # interact badly with screensavers. 113 env["QEMU_DONT_GRAB"] = "1" 114 if not os.path.exists(self.rootfs): 115 self.logger.error("Invalid rootfs %s" % self.rootfs) 116 return False 117 if not os.path.exists(self.tmpdir): 118 self.logger.error("Invalid TMPDIR path %s" % self.tmpdir) 119 return False 120 else: 121 env["OE_TMPDIR"] = self.tmpdir 122 if not os.path.exists(self.deploy_dir_image): 123 self.logger.error("Invalid DEPLOY_DIR_IMAGE path %s" % self.deploy_dir_image) 124 return False 125 else: 126 env["DEPLOY_DIR_IMAGE"] = self.deploy_dir_image 127 128 if not launch_cmd: 129 launch_cmd = 'runqemu %s %s ' % ('snapshot' if discard_writes else '', runqemuparams) 130 if self.use_kvm: 131 self.logger.debug('Using kvm for runqemu') 132 launch_cmd += ' kvm' 133 else: 134 self.logger.debug('Not using kvm for runqemu') 135 if not self.display: 136 launch_cmd += ' nographic' 137 if self.use_slirp: 138 launch_cmd += ' slirp' 139 launch_cmd += ' %s %s' % (self.machine, self.rootfs) 140 141 return self.launch(launch_cmd, qemuparams=qemuparams, get_ip=get_ip, extra_bootparams=extra_bootparams, env=env) 142 143 def launch(self, launch_cmd, get_ip = True, qemuparams = None, extra_bootparams = None, env = None): 144 try: 145 self.threadsock, threadport = self.create_socket() 146 self.server_socket, self.serverport = self.create_socket() 147 except socket.error as msg: 148 self.logger.error("Failed to create listening socket: %s" % msg[1]) 149 return False 150 151 bootparams = 'console=tty1 console=ttyS0,115200n8 printk.time=1' 152 if extra_bootparams: 153 bootparams = bootparams + ' ' + extra_bootparams 154 155 # Ask QEMU to store the QEMU process PID in file, this way we don't have to parse running processes 156 # and analyze descendents in order to determine it. 157 if os.path.exists(self.qemu_pidfile): 158 os.remove(self.qemu_pidfile) 159 self.qemuparams = 'bootparams="{0}" qemuparams="-pidfile {1}"'.format(bootparams, self.qemu_pidfile) 160 if qemuparams: 161 self.qemuparams = self.qemuparams[:-1] + " " + qemuparams + " " + '\"' 162 163 launch_cmd += ' tcpserial=%s:%s %s' % (threadport, self.serverport, self.qemuparams) 164 165 self.origchldhandler = signal.getsignal(signal.SIGCHLD) 166 signal.signal(signal.SIGCHLD, self.handleSIGCHLD) 167 168 self.logger.debug('launchcmd=%s'%(launch_cmd)) 169 170 # FIXME: We pass in stdin=subprocess.PIPE here to work around stty 171 # blocking at the end of the runqemu script when using this within 172 # oe-selftest (this makes stty error out immediately). There ought 173 # to be a proper fix but this will suffice for now. 174 self.runqemu = subprocess.Popen(launch_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE, preexec_fn=os.setpgrp, env=env) 175 output = self.runqemu.stdout 176 177 # 178 # We need the preexec_fn above so that all runqemu processes can easily be killed 179 # (by killing their process group). This presents a problem if this controlling 180 # process itself is killed however since those processes don't notice the death 181 # of the parent and merrily continue on. 182 # 183 # Rather than hack runqemu to deal with this, we add something here instead. 184 # Basically we fork off another process which holds an open pipe to the parent 185 # and also is setpgrp. If/when the pipe sees EOF from the parent dieing, it kills 186 # the process group. This is like pctrl's PDEATHSIG but for a process group 187 # rather than a single process. 188 # 189 r, w = os.pipe() 190 self.monitorpid = os.fork() 191 if self.monitorpid: 192 os.close(r) 193 self.monitorpipe = os.fdopen(w, "w") 194 else: 195 # child process 196 os.setpgrp() 197 os.close(w) 198 r = os.fdopen(r) 199 x = r.read() 200 os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM) 201 sys.exit(0) 202 203 self.logger.debug("runqemu started, pid is %s" % self.runqemu.pid) 204 self.logger.debug("waiting at most %s seconds for qemu pid (%s)" % 205 (self.runqemutime, time.strftime("%D %H:%M:%S"))) 206 endtime = time.time() + self.runqemutime 207 while not self.is_alive() and time.time() < endtime: 208 if self.runqemu.poll(): 209 if self.runqemu.returncode: 210 # No point waiting any longer 211 self.logger.debug('runqemu exited with code %d' % self.runqemu.returncode) 212 self._dump_host() 213 self.logger.debug("Output from runqemu:\n%s" % self.getOutput(output)) 214 self.stop() 215 return False 216 time.sleep(0.5) 217 218 if not self.is_alive(): 219 self.logger.error("Qemu pid didn't appear in %s seconds (%s)" % 220 (self.runqemutime, time.strftime("%D %H:%M:%S"))) 221 # Dump all processes to help us to figure out what is going on... 222 ps = subprocess.Popen(['ps', 'axww', '-o', 'pid,ppid,command '], stdout=subprocess.PIPE).communicate()[0] 223 processes = ps.decode("utf-8") 224 self.logger.debug("Running processes:\n%s" % processes) 225 self._dump_host() 226 op = self.getOutput(output) 227 self.stop() 228 if op: 229 self.logger.error("Output from runqemu:\n%s" % op) 230 else: 231 self.logger.error("No output from runqemu.\n") 232 return False 233 234 # We are alive: qemu is running 235 out = self.getOutput(output) 236 netconf = False # network configuration is not required by default 237 self.logger.debug("qemu started in %s seconds - qemu procces pid is %s (%s)" % 238 (time.time() - (endtime - self.runqemutime), 239 self.qemupid, time.strftime("%D %H:%M:%S"))) 240 if get_ip: 241 cmdline = '' 242 with open('/proc/%s/cmdline' % self.qemupid) as p: 243 cmdline = p.read() 244 # It is needed to sanitize the data received 245 # because is possible to have control characters 246 cmdline = re_control_char.sub(' ', cmdline) 247 try: 248 if self.use_slirp: 249 tcp_ports = cmdline.split("hostfwd=tcp::")[1] 250 host_port = tcp_ports[:tcp_ports.find('-')] 251 self.ip = "localhost:%s" % host_port 252 else: 253 ips = re.findall(r"((?:[0-9]{1,3}\.){3}[0-9]{1,3})", cmdline.split("ip=")[1]) 254 self.ip = ips[0] 255 self.server_ip = ips[1] 256 self.logger.debug("qemu cmdline used:\n{}".format(cmdline)) 257 except (IndexError, ValueError): 258 # Try to get network configuration from runqemu output 259 match = re.match(r'.*Network configuration: ([0-9.]+)::([0-9.]+):([0-9.]+)$.*', 260 out, re.MULTILINE|re.DOTALL) 261 if match: 262 self.ip, self.server_ip, self.netmask = match.groups() 263 # network configuration is required as we couldn't get it 264 # from the runqemu command line, so qemu doesn't run kernel 265 # and guest networking is not configured 266 netconf = True 267 else: 268 self.logger.error("Couldn't get ip from qemu command line and runqemu output! " 269 "Here is the qemu command line used:\n%s\n" 270 "and output from runqemu:\n%s" % (cmdline, out)) 271 self._dump_host() 272 self.stop() 273 return False 274 275 self.logger.debug("Target IP: %s" % self.ip) 276 self.logger.debug("Server IP: %s" % self.server_ip) 277 278 self.thread = LoggingThread(self.log, self.threadsock, self.logger) 279 self.thread.start() 280 if not self.thread.connection_established.wait(self.boottime): 281 self.logger.error("Didn't receive a console connection from qemu. " 282 "Here is the qemu command line used:\n%s\nand " 283 "output from runqemu:\n%s" % (cmdline, out)) 284 self.stop_thread() 285 return False 286 287 self.logger.debug("Output from runqemu:\n%s", out) 288 self.logger.debug("Waiting at most %d seconds for login banner (%s)" % 289 (self.boottime, time.strftime("%D %H:%M:%S"))) 290 endtime = time.time() + self.boottime 291 socklist = [self.server_socket] 292 reachedlogin = False 293 stopread = False 294 qemusock = None 295 bootlog = b'' 296 data = b'' 297 while time.time() < endtime and not stopread: 298 try: 299 sread, swrite, serror = select.select(socklist, [], [], 5) 300 except InterruptedError: 301 continue 302 for sock in sread: 303 if sock is self.server_socket: 304 qemusock, addr = self.server_socket.accept() 305 qemusock.setblocking(0) 306 socklist.append(qemusock) 307 socklist.remove(self.server_socket) 308 self.logger.debug("Connection from %s:%s" % addr) 309 else: 310 data = data + sock.recv(1024) 311 if data: 312 bootlog += data 313 data = b'' 314 if b' login:' in bootlog: 315 self.server_socket = qemusock 316 stopread = True 317 reachedlogin = True 318 self.logger.debug("Reached login banner in %s seconds (%s)" % 319 (time.time() - (endtime - self.boottime), 320 time.strftime("%D %H:%M:%S"))) 321 else: 322 # no need to check if reachedlogin unless we support multiple connections 323 self.logger.debug("QEMU socket disconnected before login banner reached. (%s)" % 324 time.strftime("%D %H:%M:%S")) 325 socklist.remove(sock) 326 sock.close() 327 stopread = True 328 329 330 if not reachedlogin: 331 if time.time() >= endtime: 332 self.logger.debug("Target didn't reach login banner in %d seconds (%s)" % 333 (self.boottime, time.strftime("%D %H:%M:%S"))) 334 tail = lambda l: "\n".join(l.splitlines()[-25:]) 335 bootlog = bootlog.decode("utf-8") 336 # in case bootlog is empty, use tail qemu log store at self.msg 337 lines = tail(bootlog if bootlog else self.msg) 338 self.logger.debug("Last 25 lines of text:\n%s" % lines) 339 self.logger.debug("Check full boot log: %s" % self.logfile) 340 self._dump_host() 341 self.stop() 342 return False 343 344 # If we are not able to login the tests can continue 345 try: 346 (status, output) = self.run_serial("root\n", raw=True) 347 if re.search(r"root@[a-zA-Z0-9\-]+:~#", output): 348 self.logged = True 349 self.logger.debug("Logged as root in serial console") 350 if netconf: 351 # configure guest networking 352 cmd = "ifconfig eth0 %s netmask %s up\n" % (self.ip, self.netmask) 353 output = self.run_serial(cmd, raw=True)[1] 354 if re.search(r"root@[a-zA-Z0-9\-]+:~#", output): 355 self.logger.debug("configured ip address %s", self.ip) 356 else: 357 self.logger.debug("Couldn't configure guest networking") 358 else: 359 self.logger.debug("Couldn't login into serial console" 360 " as root using blank password") 361 self.logger.debug("The output:\n%s" % output) 362 except: 363 self.logger.debug("Serial console failed while trying to login") 364 return True 365 366 def stop(self): 367 if hasattr(self, "origchldhandler"): 368 signal.signal(signal.SIGCHLD, self.origchldhandler) 369 self.stop_thread() 370 self.stop_qemu_system() 371 if self.runqemu: 372 if hasattr(self, "monitorpid"): 373 os.kill(self.monitorpid, signal.SIGKILL) 374 self.logger.debug("Sending SIGTERM to runqemu") 375 try: 376 os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM) 377 except OSError as e: 378 if e.errno != errno.ESRCH: 379 raise 380 endtime = time.time() + self.runqemutime 381 while self.runqemu.poll() is None and time.time() < endtime: 382 time.sleep(1) 383 if self.runqemu.poll() is None: 384 self.logger.debug("Sending SIGKILL to runqemu") 385 os.killpg(os.getpgid(self.runqemu.pid), signal.SIGKILL) 386 self.runqemu.stdin.close() 387 self.runqemu.stdout.close() 388 self.runqemu = None 389 390 if hasattr(self, 'server_socket') and self.server_socket: 391 self.server_socket.close() 392 self.server_socket = None 393 if hasattr(self, 'threadsock') and self.threadsock: 394 self.threadsock.close() 395 self.threadsock = None 396 self.qemupid = None 397 self.ip = None 398 if os.path.exists(self.qemu_pidfile): 399 os.remove(self.qemu_pidfile) 400 if self.monitorpipe: 401 self.monitorpipe.close() 402 403 def stop_qemu_system(self): 404 if self.qemupid: 405 try: 406 # qemu-system behaves well and a SIGTERM is enough 407 os.kill(self.qemupid, signal.SIGTERM) 408 except ProcessLookupError as e: 409 self.logger.warning('qemu-system ended unexpectedly') 410 411 def stop_thread(self): 412 if self.thread and self.thread.is_alive(): 413 self.thread.stop() 414 self.thread.join() 415 416 def restart(self, qemuparams = None): 417 self.logger.debug("Restarting qemu process") 418 if self.runqemu.poll() is None: 419 self.stop() 420 if self.start(qemuparams): 421 return True 422 return False 423 424 def is_alive(self): 425 if not self.runqemu or self.runqemu.poll() is not None: 426 return False 427 if os.path.isfile(self.qemu_pidfile): 428 f = open(self.qemu_pidfile, 'r') 429 qemu_pid = f.read() 430 f.close() 431 qemupid = int(qemu_pid) 432 if os.path.exists("/proc/" + str(qemupid)): 433 self.qemupid = qemupid 434 return True 435 return False 436 437 def run_serial(self, command, raw=False, timeout=60): 438 # We assume target system have echo to get command status 439 if not raw: 440 command = "%s; echo $?\n" % command 441 442 data = '' 443 status = 0 444 self.server_socket.sendall(command.encode('utf-8')) 445 start = time.time() 446 end = start + timeout 447 while True: 448 now = time.time() 449 if now >= end: 450 data += "<<< run_serial(): command timed out after %d seconds without output >>>\r\n\r\n" % timeout 451 break 452 try: 453 sread, _, _ = select.select([self.server_socket],[],[], end - now) 454 except InterruptedError: 455 continue 456 if sread: 457 answer = self.server_socket.recv(1024) 458 if answer: 459 data += answer.decode('utf-8') 460 # Search the prompt to stop 461 if re.search(r"[a-zA-Z0-9]+@[a-zA-Z0-9\-]+:~#", data): 462 break 463 else: 464 raise Exception("No data on serial console socket") 465 466 if data: 467 if raw: 468 status = 1 469 else: 470 # Remove first line (command line) and last line (prompt) 471 data = data[data.find('$?\r\n')+4:data.rfind('\r\n')] 472 index = data.rfind('\r\n') 473 if index == -1: 474 status_cmd = data 475 data = "" 476 else: 477 status_cmd = data[index+2:] 478 data = data[:index] 479 if (status_cmd == "0"): 480 status = 1 481 return (status, str(data)) 482 483 484 def _dump_host(self): 485 self.host_dumper.create_dir("qemu") 486 self.logger.warning("Qemu ended unexpectedly, dump data from host" 487 " is in %s" % self.host_dumper.dump_dir) 488 self.host_dumper.dump_host() 489 490# This class is for reading data from a socket and passing it to logfunc 491# to be processed. It's completely event driven and has a straightforward 492# event loop. The mechanism for stopping the thread is a simple pipe which 493# will wake up the poll and allow for tearing everything down. 494class LoggingThread(threading.Thread): 495 def __init__(self, logfunc, sock, logger): 496 self.connection_established = threading.Event() 497 self.serversock = sock 498 self.logfunc = logfunc 499 self.logger = logger 500 self.readsock = None 501 self.running = False 502 503 self.errorevents = select.POLLERR | select.POLLHUP | select.POLLNVAL 504 self.readevents = select.POLLIN | select.POLLPRI 505 506 threading.Thread.__init__(self, target=self.threadtarget) 507 508 def threadtarget(self): 509 try: 510 self.eventloop() 511 finally: 512 self.teardown() 513 514 def run(self): 515 self.logger.debug("Starting logging thread") 516 self.readpipe, self.writepipe = os.pipe() 517 threading.Thread.run(self) 518 519 def stop(self): 520 self.logger.debug("Stopping logging thread") 521 if self.running: 522 os.write(self.writepipe, bytes("stop", "utf-8")) 523 524 def teardown(self): 525 self.logger.debug("Tearing down logging thread") 526 self.close_socket(self.serversock) 527 528 if self.readsock is not None: 529 self.close_socket(self.readsock) 530 531 self.close_ignore_error(self.readpipe) 532 self.close_ignore_error(self.writepipe) 533 self.running = False 534 535 def eventloop(self): 536 poll = select.poll() 537 event_read_mask = self.errorevents | self.readevents 538 poll.register(self.serversock.fileno()) 539 poll.register(self.readpipe, event_read_mask) 540 541 breakout = False 542 self.running = True 543 self.logger.debug("Starting thread event loop") 544 while not breakout: 545 events = poll.poll() 546 for event in events: 547 # An error occurred, bail out 548 if event[1] & self.errorevents: 549 raise Exception(self.stringify_event(event[1])) 550 551 # Event to stop the thread 552 if self.readpipe == event[0]: 553 self.logger.debug("Stop event received") 554 breakout = True 555 break 556 557 # A connection request was received 558 elif self.serversock.fileno() == event[0]: 559 self.logger.debug("Connection request received") 560 self.readsock, _ = self.serversock.accept() 561 self.readsock.setblocking(0) 562 poll.unregister(self.serversock.fileno()) 563 poll.register(self.readsock.fileno(), event_read_mask) 564 565 self.logger.debug("Setting connection established event") 566 self.connection_established.set() 567 568 # Actual data to be logged 569 elif self.readsock.fileno() == event[0]: 570 data = self.recv(1024) 571 self.logfunc(data) 572 573 # Since the socket is non-blocking make sure to honor EAGAIN 574 # and EWOULDBLOCK. 575 def recv(self, count): 576 try: 577 data = self.readsock.recv(count) 578 except socket.error as e: 579 if e.errno == errno.EAGAIN or e.errno == errno.EWOULDBLOCK: 580 return '' 581 else: 582 raise 583 584 if data is None: 585 raise Exception("No data on read ready socket") 586 elif not data: 587 # This actually means an orderly shutdown 588 # happened. But for this code it counts as an 589 # error since the connection shouldn't go away 590 # until qemu exits. 591 raise Exception("Console connection closed unexpectedly") 592 593 return data 594 595 def stringify_event(self, event): 596 val = '' 597 if select.POLLERR == event: 598 val = 'POLLER' 599 elif select.POLLHUP == event: 600 val = 'POLLHUP' 601 elif select.POLLNVAL == event: 602 val = 'POLLNVAL' 603 return val 604 605 def close_socket(self, sock): 606 sock.shutdown(socket.SHUT_RDWR) 607 sock.close() 608 609 def close_ignore_error(self, fd): 610 try: 611 os.close(fd) 612 except OSError: 613 pass 614