1# 2# Copyright (C) 2013 Intel Corporation 3# 4# SPDX-License-Identifier: MIT 5# 6 7# This module provides a class for starting qemu images using runqemu. 8# It's used by testimage.bbclass. 9 10import subprocess 11import os 12import sys 13import time 14import signal 15import re 16import socket 17import select 18import errno 19import string 20import threading 21import codecs 22import logging 23from oeqa.utils.dump import HostDumper 24 25# Get Unicode non printable control chars 26control_range = list(range(0,32))+list(range(127,160)) 27control_chars = [chr(x) for x in control_range 28 if chr(x) not in string.printable] 29re_control_char = re.compile('[%s]' % re.escape("".join(control_chars))) 30 31class QemuRunner: 32 33 def __init__(self, machine, rootfs, display, tmpdir, deploy_dir_image, logfile, boottime, dump_dir, dump_host_cmds, 34 use_kvm, logger, use_slirp=False): 35 36 # Popen object for runqemu 37 self.runqemu = None 38 # pid of the qemu process that runqemu will start 39 self.qemupid = None 40 # target ip - from the command line or runqemu output 41 self.ip = None 42 # host ip - where qemu is running 43 self.server_ip = None 44 # target ip netmask 45 self.netmask = None 46 47 self.machine = machine 48 self.rootfs = rootfs 49 self.display = display 50 self.tmpdir = tmpdir 51 self.deploy_dir_image = deploy_dir_image 52 self.logfile = logfile 53 self.boottime = boottime 54 self.logged = False 55 self.thread = None 56 self.use_kvm = use_kvm 57 self.use_slirp = use_slirp 58 self.msg = '' 59 60 self.runqemutime = 120 61 self.qemu_pidfile = 'pidfile_'+str(os.getpid()) 62 self.host_dumper = HostDumper(dump_host_cmds, dump_dir) 63 self.monitorpipe = None 64 65 self.logger = logger 66 67 def create_socket(self): 68 try: 69 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 70 sock.setblocking(0) 71 sock.bind(("127.0.0.1",0)) 72 sock.listen(2) 73 port = sock.getsockname()[1] 74 self.logger.debug("Created listening socket for qemu serial console on: 127.0.0.1:%s" % port) 75 return (sock, port) 76 77 except socket.error: 78 sock.close() 79 raise 80 81 def log(self, msg): 82 if self.logfile: 83 # It is needed to sanitize the data received from qemu 84 # because is possible to have control characters 85 msg = msg.decode("utf-8", errors='ignore') 86 msg = re_control_char.sub('', msg) 87 self.msg += msg 88 with codecs.open(self.logfile, "a", encoding="utf-8") as f: 89 f.write("%s" % msg) 90 91 def getOutput(self, o): 92 import fcntl 93 fl = fcntl.fcntl(o, fcntl.F_GETFL) 94 fcntl.fcntl(o, fcntl.F_SETFL, fl | os.O_NONBLOCK) 95 return os.read(o.fileno(), 1000000).decode("utf-8") 96 97 98 def handleSIGCHLD(self, signum, frame): 99 if self.runqemu and self.runqemu.poll(): 100 if self.runqemu.returncode: 101 self.logger.warning('runqemu exited with code %d' % self.runqemu.returncode) 102 self.logger.debug("Output from runqemu:\n%s" % self.getOutput(self.runqemu.stdout)) 103 self.stop() 104 self._dump_host() 105 raise SystemExit 106 107 def start(self, qemuparams = None, get_ip = True, extra_bootparams = None, runqemuparams='', launch_cmd=None, discard_writes=True): 108 env = os.environ.copy() 109 if self.display: 110 env["DISPLAY"] = self.display 111 # Set this flag so that Qemu doesn't do any grabs as SDL grabs 112 # interact badly with screensavers. 113 env["QEMU_DONT_GRAB"] = "1" 114 if not os.path.exists(self.rootfs): 115 self.logger.error("Invalid rootfs %s" % self.rootfs) 116 return False 117 if not os.path.exists(self.tmpdir): 118 self.logger.error("Invalid TMPDIR path %s" % self.tmpdir) 119 return False 120 else: 121 env["OE_TMPDIR"] = self.tmpdir 122 if not os.path.exists(self.deploy_dir_image): 123 self.logger.error("Invalid DEPLOY_DIR_IMAGE path %s" % self.deploy_dir_image) 124 return False 125 else: 126 env["DEPLOY_DIR_IMAGE"] = self.deploy_dir_image 127 128 if not launch_cmd: 129 launch_cmd = 'runqemu %s' % ('snapshot' if discard_writes else '') 130 if self.use_kvm: 131 self.logger.debug('Using kvm for runqemu') 132 launch_cmd += ' kvm' 133 else: 134 self.logger.debug('Not using kvm for runqemu') 135 if not self.display: 136 launch_cmd += ' nographic' 137 if self.use_slirp: 138 launch_cmd += ' slirp' 139 launch_cmd += ' %s %s %s' % (runqemuparams, self.machine, self.rootfs) 140 141 return self.launch(launch_cmd, qemuparams=qemuparams, get_ip=get_ip, extra_bootparams=extra_bootparams, env=env) 142 143 def launch(self, launch_cmd, get_ip = True, qemuparams = None, extra_bootparams = None, env = None): 144 try: 145 self.threadsock, threadport = self.create_socket() 146 self.server_socket, self.serverport = self.create_socket() 147 except socket.error as msg: 148 self.logger.error("Failed to create listening socket: %s" % msg[1]) 149 return False 150 151 bootparams = 'console=tty1 console=ttyS0,115200n8 printk.time=1' 152 if extra_bootparams: 153 bootparams = bootparams + ' ' + extra_bootparams 154 155 # Ask QEMU to store the QEMU process PID in file, this way we don't have to parse running processes 156 # and analyze descendents in order to determine it. 157 if os.path.exists(self.qemu_pidfile): 158 os.remove(self.qemu_pidfile) 159 self.qemuparams = 'bootparams="{0}" qemuparams="-pidfile {1}"'.format(bootparams, self.qemu_pidfile) 160 if qemuparams: 161 self.qemuparams = self.qemuparams[:-1] + " " + qemuparams + " " + '\"' 162 163 launch_cmd += ' tcpserial=%s:%s %s' % (threadport, self.serverport, self.qemuparams) 164 165 self.origchldhandler = signal.getsignal(signal.SIGCHLD) 166 signal.signal(signal.SIGCHLD, self.handleSIGCHLD) 167 168 self.logger.debug('launchcmd=%s'%(launch_cmd)) 169 170 # FIXME: We pass in stdin=subprocess.PIPE here to work around stty 171 # blocking at the end of the runqemu script when using this within 172 # oe-selftest (this makes stty error out immediately). There ought 173 # to be a proper fix but this will suffice for now. 174 self.runqemu = subprocess.Popen(launch_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE, preexec_fn=os.setpgrp, env=env) 175 output = self.runqemu.stdout 176 177 # 178 # We need the preexec_fn above so that all runqemu processes can easily be killed 179 # (by killing their process group). This presents a problem if this controlling 180 # process itself is killed however since those processes don't notice the death 181 # of the parent and merrily continue on. 182 # 183 # Rather than hack runqemu to deal with this, we add something here instead. 184 # Basically we fork off another process which holds an open pipe to the parent 185 # and also is setpgrp. If/when the pipe sees EOF from the parent dieing, it kills 186 # the process group. This is like pctrl's PDEATHSIG but for a process group 187 # rather than a single process. 188 # 189 r, w = os.pipe() 190 self.monitorpid = os.fork() 191 if self.monitorpid: 192 os.close(r) 193 self.monitorpipe = os.fdopen(w, "w") 194 else: 195 # child process 196 os.setpgrp() 197 os.close(w) 198 r = os.fdopen(r) 199 x = r.read() 200 os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM) 201 sys.exit(0) 202 203 self.logger.debug("runqemu started, pid is %s" % self.runqemu.pid) 204 self.logger.debug("waiting at most %s seconds for qemu pid (%s)" % 205 (self.runqemutime, time.strftime("%D %H:%M:%S"))) 206 endtime = time.time() + self.runqemutime 207 while not self.is_alive() and time.time() < endtime: 208 if self.runqemu.poll(): 209 if self.runqemu.returncode: 210 # No point waiting any longer 211 self.logger.warning('runqemu exited with code %d' % self.runqemu.returncode) 212 self._dump_host() 213 self.logger.warning("Output from runqemu:\n%s" % self.getOutput(output)) 214 self.stop() 215 return False 216 time.sleep(0.5) 217 218 if not self.is_alive(): 219 self.logger.error("Qemu pid didn't appear in %s seconds (%s)" % 220 (self.runqemutime, time.strftime("%D %H:%M:%S"))) 221 # Dump all processes to help us to figure out what is going on... 222 ps = subprocess.Popen(['ps', 'axww', '-o', 'pid,ppid,command '], stdout=subprocess.PIPE).communicate()[0] 223 processes = ps.decode("utf-8") 224 self.logger.debug("Running processes:\n%s" % processes) 225 self._dump_host() 226 op = self.getOutput(output) 227 self.stop() 228 if op: 229 self.logger.error("Output from runqemu:\n%s" % op) 230 else: 231 self.logger.error("No output from runqemu.\n") 232 return False 233 234 # We are alive: qemu is running 235 out = self.getOutput(output) 236 netconf = False # network configuration is not required by default 237 self.logger.debug("qemu started in %s seconds - qemu procces pid is %s (%s)" % 238 (time.time() - (endtime - self.runqemutime), 239 self.qemupid, time.strftime("%D %H:%M:%S"))) 240 if get_ip: 241 cmdline = '' 242 with open('/proc/%s/cmdline' % self.qemupid) as p: 243 cmdline = p.read() 244 # It is needed to sanitize the data received 245 # because is possible to have control characters 246 cmdline = re_control_char.sub(' ', cmdline) 247 try: 248 if self.use_slirp: 249 tcp_ports = cmdline.split("hostfwd=tcp::")[1] 250 host_port = tcp_ports[:tcp_ports.find('-')] 251 self.ip = "localhost:%s" % host_port 252 else: 253 ips = re.findall(r"((?:[0-9]{1,3}\.){3}[0-9]{1,3})", cmdline.split("ip=")[1]) 254 self.ip = ips[0] 255 self.server_ip = ips[1] 256 self.logger.debug("qemu cmdline used:\n{}".format(cmdline)) 257 except (IndexError, ValueError): 258 # Try to get network configuration from runqemu output 259 match = re.match(r'.*Network configuration: ([0-9.]+)::([0-9.]+):([0-9.]+)$.*', 260 out, re.MULTILINE|re.DOTALL) 261 if match: 262 self.ip, self.server_ip, self.netmask = match.groups() 263 # network configuration is required as we couldn't get it 264 # from the runqemu command line, so qemu doesn't run kernel 265 # and guest networking is not configured 266 netconf = True 267 else: 268 self.logger.error("Couldn't get ip from qemu command line and runqemu output! " 269 "Here is the qemu command line used:\n%s\n" 270 "and output from runqemu:\n%s" % (cmdline, out)) 271 self._dump_host() 272 self.stop() 273 return False 274 275 self.logger.debug("Target IP: %s" % self.ip) 276 self.logger.debug("Server IP: %s" % self.server_ip) 277 278 self.thread = LoggingThread(self.log, self.threadsock, self.logger) 279 self.thread.start() 280 if not self.thread.connection_established.wait(self.boottime): 281 self.logger.error("Didn't receive a console connection from qemu. " 282 "Here is the qemu command line used:\n%s\nand " 283 "output from runqemu:\n%s" % (cmdline, out)) 284 self.stop_thread() 285 return False 286 287 self.logger.debug("Output from runqemu:\n%s", out) 288 self.logger.debug("Waiting at most %d seconds for login banner (%s)" % 289 (self.boottime, time.strftime("%D %H:%M:%S"))) 290 endtime = time.time() + self.boottime 291 socklist = [self.server_socket] 292 reachedlogin = False 293 stopread = False 294 qemusock = None 295 bootlog = b'' 296 data = b'' 297 while time.time() < endtime and not stopread: 298 try: 299 sread, swrite, serror = select.select(socklist, [], [], 5) 300 except InterruptedError: 301 continue 302 for sock in sread: 303 if sock is self.server_socket: 304 qemusock, addr = self.server_socket.accept() 305 qemusock.setblocking(0) 306 socklist.append(qemusock) 307 socklist.remove(self.server_socket) 308 self.logger.debug("Connection from %s:%s" % addr) 309 else: 310 data = data + sock.recv(1024) 311 if data: 312 bootlog += data 313 data = b'' 314 if b' login:' in bootlog: 315 self.server_socket = qemusock 316 stopread = True 317 reachedlogin = True 318 self.logger.debug("Reached login banner in %s seconds (%s)" % 319 (time.time() - (endtime - self.boottime), 320 time.strftime("%D %H:%M:%S"))) 321 else: 322 # no need to check if reachedlogin unless we support multiple connections 323 self.logger.debug("QEMU socket disconnected before login banner reached. (%s)" % 324 time.strftime("%D %H:%M:%S")) 325 socklist.remove(sock) 326 sock.close() 327 stopread = True 328 329 330 if not reachedlogin: 331 if time.time() >= endtime: 332 self.logger.warning("Target didn't reach login banner in %d seconds (%s)" % 333 (self.boottime, time.strftime("%D %H:%M:%S"))) 334 tail = lambda l: "\n".join(l.splitlines()[-25:]) 335 bootlog = bootlog.decode("utf-8") 336 # in case bootlog is empty, use tail qemu log store at self.msg 337 lines = tail(bootlog if bootlog else self.msg) 338 self.logger.warning("Last 25 lines of text:\n%s" % lines) 339 self.logger.warning("Check full boot log: %s" % self.logfile) 340 self._dump_host() 341 self.stop() 342 return False 343 344 # If we are not able to login the tests can continue 345 try: 346 (status, output) = self.run_serial("root\n", raw=True) 347 if re.search(r"root@[a-zA-Z0-9\-]+:~#", output): 348 self.logged = True 349 self.logger.debug("Logged as root in serial console") 350 if netconf: 351 # configure guest networking 352 cmd = "ifconfig eth0 %s netmask %s up\n" % (self.ip, self.netmask) 353 output = self.run_serial(cmd, raw=True)[1] 354 if re.search(r"root@[a-zA-Z0-9\-]+:~#", output): 355 self.logger.debug("configured ip address %s", self.ip) 356 else: 357 self.logger.debug("Couldn't configure guest networking") 358 else: 359 self.logger.warning("Couldn't login into serial console" 360 " as root using blank password") 361 self.logger.warning("The output:\n%s" % output) 362 except: 363 self.logger.warning("Serial console failed while trying to login") 364 return True 365 366 def stop(self): 367 if hasattr(self, "origchldhandler"): 368 signal.signal(signal.SIGCHLD, self.origchldhandler) 369 self.stop_thread() 370 self.stop_qemu_system() 371 if self.runqemu: 372 if hasattr(self, "monitorpid"): 373 os.kill(self.monitorpid, signal.SIGKILL) 374 self.logger.debug("Sending SIGTERM to runqemu") 375 try: 376 os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM) 377 except OSError as e: 378 if e.errno != errno.ESRCH: 379 raise 380 endtime = time.time() + self.runqemutime 381 while self.runqemu.poll() is None and time.time() < endtime: 382 time.sleep(1) 383 if self.runqemu.poll() is None: 384 self.logger.debug("Sending SIGKILL to runqemu") 385 os.killpg(os.getpgid(self.runqemu.pid), signal.SIGKILL) 386 self.runqemu.stdin.close() 387 self.runqemu.stdout.close() 388 self.runqemu = None 389 390 if hasattr(self, 'server_socket') and self.server_socket: 391 self.server_socket.close() 392 self.server_socket = None 393 if hasattr(self, 'threadsock') and self.threadsock: 394 self.threadsock.close() 395 self.threadsock = None 396 self.qemupid = None 397 self.ip = None 398 if os.path.exists(self.qemu_pidfile): 399 os.remove(self.qemu_pidfile) 400 if self.monitorpipe: 401 self.monitorpipe.close() 402 403 def stop_qemu_system(self): 404 if self.qemupid: 405 try: 406 # qemu-system behaves well and a SIGTERM is enough 407 os.kill(self.qemupid, signal.SIGTERM) 408 except ProcessLookupError as e: 409 self.logger.warning('qemu-system ended unexpectedly') 410 411 def stop_thread(self): 412 if self.thread and self.thread.is_alive(): 413 self.thread.stop() 414 self.thread.join() 415 416 def restart(self, qemuparams = None): 417 self.logger.warning("Restarting qemu process") 418 if self.runqemu.poll() is None: 419 self.stop() 420 if self.start(qemuparams): 421 return True 422 return False 423 424 def is_alive(self): 425 if not self.runqemu or self.runqemu.poll() is not None: 426 return False 427 if os.path.isfile(self.qemu_pidfile): 428 # when handling pidfile, qemu creates the file, stat it, lock it and then write to it 429 # so it's possible that the file has been created but the content is empty 430 pidfile_timeout = time.time() + 3 431 while time.time() < pidfile_timeout: 432 with open(self.qemu_pidfile, 'r') as f: 433 qemu_pid = f.read().strip() 434 # file created but not yet written contents 435 if not qemu_pid: 436 time.sleep(0.5) 437 continue 438 else: 439 if os.path.exists("/proc/" + qemu_pid): 440 self.qemupid = int(qemu_pid) 441 return True 442 return False 443 444 def run_serial(self, command, raw=False, timeout=60): 445 # We assume target system have echo to get command status 446 if not raw: 447 command = "%s; echo $?\n" % command 448 449 data = '' 450 status = 0 451 self.server_socket.sendall(command.encode('utf-8')) 452 start = time.time() 453 end = start + timeout 454 while True: 455 now = time.time() 456 if now >= end: 457 data += "<<< run_serial(): command timed out after %d seconds without output >>>\r\n\r\n" % timeout 458 break 459 try: 460 sread, _, _ = select.select([self.server_socket],[],[], end - now) 461 except InterruptedError: 462 continue 463 if sread: 464 answer = self.server_socket.recv(1024) 465 if answer: 466 data += answer.decode('utf-8') 467 # Search the prompt to stop 468 if re.search(r"[a-zA-Z0-9]+@[a-zA-Z0-9\-]+:~#", data): 469 break 470 else: 471 raise Exception("No data on serial console socket") 472 473 if data: 474 if raw: 475 status = 1 476 else: 477 # Remove first line (command line) and last line (prompt) 478 data = data[data.find('$?\r\n')+4:data.rfind('\r\n')] 479 index = data.rfind('\r\n') 480 if index == -1: 481 status_cmd = data 482 data = "" 483 else: 484 status_cmd = data[index+2:] 485 data = data[:index] 486 if (status_cmd == "0"): 487 status = 1 488 return (status, str(data)) 489 490 491 def _dump_host(self): 492 self.host_dumper.create_dir("qemu") 493 self.logger.warning("Qemu ended unexpectedly, dump data from host" 494 " is in %s" % self.host_dumper.dump_dir) 495 self.host_dumper.dump_host() 496 497# This class is for reading data from a socket and passing it to logfunc 498# to be processed. It's completely event driven and has a straightforward 499# event loop. The mechanism for stopping the thread is a simple pipe which 500# will wake up the poll and allow for tearing everything down. 501class LoggingThread(threading.Thread): 502 def __init__(self, logfunc, sock, logger): 503 self.connection_established = threading.Event() 504 self.serversock = sock 505 self.logfunc = logfunc 506 self.logger = logger 507 self.readsock = None 508 self.running = False 509 510 self.errorevents = select.POLLERR | select.POLLHUP | select.POLLNVAL 511 self.readevents = select.POLLIN | select.POLLPRI 512 513 threading.Thread.__init__(self, target=self.threadtarget) 514 515 def threadtarget(self): 516 try: 517 self.eventloop() 518 finally: 519 self.teardown() 520 521 def run(self): 522 self.logger.debug("Starting logging thread") 523 self.readpipe, self.writepipe = os.pipe() 524 threading.Thread.run(self) 525 526 def stop(self): 527 self.logger.debug("Stopping logging thread") 528 if self.running: 529 os.write(self.writepipe, bytes("stop", "utf-8")) 530 531 def teardown(self): 532 self.logger.debug("Tearing down logging thread") 533 self.close_socket(self.serversock) 534 535 if self.readsock is not None: 536 self.close_socket(self.readsock) 537 538 self.close_ignore_error(self.readpipe) 539 self.close_ignore_error(self.writepipe) 540 self.running = False 541 542 def eventloop(self): 543 poll = select.poll() 544 event_read_mask = self.errorevents | self.readevents 545 poll.register(self.serversock.fileno()) 546 poll.register(self.readpipe, event_read_mask) 547 548 breakout = False 549 self.running = True 550 self.logger.debug("Starting thread event loop") 551 while not breakout: 552 events = poll.poll() 553 for event in events: 554 # An error occurred, bail out 555 if event[1] & self.errorevents: 556 raise Exception(self.stringify_event(event[1])) 557 558 # Event to stop the thread 559 if self.readpipe == event[0]: 560 self.logger.debug("Stop event received") 561 breakout = True 562 break 563 564 # A connection request was received 565 elif self.serversock.fileno() == event[0]: 566 self.logger.debug("Connection request received") 567 self.readsock, _ = self.serversock.accept() 568 self.readsock.setblocking(0) 569 poll.unregister(self.serversock.fileno()) 570 poll.register(self.readsock.fileno(), event_read_mask) 571 572 self.logger.debug("Setting connection established event") 573 self.connection_established.set() 574 575 # Actual data to be logged 576 elif self.readsock.fileno() == event[0]: 577 data = self.recv(1024) 578 self.logfunc(data) 579 580 # Since the socket is non-blocking make sure to honor EAGAIN 581 # and EWOULDBLOCK. 582 def recv(self, count): 583 try: 584 data = self.readsock.recv(count) 585 except socket.error as e: 586 if e.errno == errno.EAGAIN or e.errno == errno.EWOULDBLOCK: 587 return '' 588 else: 589 raise 590 591 if data is None: 592 raise Exception("No data on read ready socket") 593 elif not data: 594 # This actually means an orderly shutdown 595 # happened. But for this code it counts as an 596 # error since the connection shouldn't go away 597 # until qemu exits. 598 raise Exception("Console connection closed unexpectedly") 599 600 return data 601 602 def stringify_event(self, event): 603 val = '' 604 if select.POLLERR == event: 605 val = 'POLLER' 606 elif select.POLLHUP == event: 607 val = 'POLLHUP' 608 elif select.POLLNVAL == event: 609 val = 'POLLNVAL' 610 return val 611 612 def close_socket(self, sock): 613 sock.shutdown(socket.SHUT_RDWR) 614 sock.close() 615 616 def close_ignore_error(self, fd): 617 try: 618 os.close(fd) 619 except OSError: 620 pass 621