1#
2# Copyright (C) 2013 Intel Corporation
3#
4# SPDX-License-Identifier: MIT
5#
6
7# This module provides a class for starting qemu images using runqemu.
8# It's used by testimage.bbclass.
9
10import subprocess
11import os
12import sys
13import time
14import signal
15import re
16import socket
17import select
18import errno
19import string
20import threading
21import codecs
22import logging
23from oeqa.utils.dump import HostDumper
24
25# Get Unicode non printable control chars
26control_range = list(range(0,32))+list(range(127,160))
27control_chars = [chr(x) for x in control_range
28                if chr(x) not in string.printable]
29re_control_char = re.compile('[%s]' % re.escape("".join(control_chars)))
30
31class QemuRunner:
32
33    def __init__(self, machine, rootfs, display, tmpdir, deploy_dir_image, logfile, boottime, dump_dir, dump_host_cmds,
34                 use_kvm, logger, use_slirp=False):
35
36        # Popen object for runqemu
37        self.runqemu = None
38        # pid of the qemu process that runqemu will start
39        self.qemupid = None
40        # target ip - from the command line or runqemu output
41        self.ip = None
42        # host ip - where qemu is running
43        self.server_ip = None
44        # target ip netmask
45        self.netmask = None
46
47        self.machine = machine
48        self.rootfs = rootfs
49        self.display = display
50        self.tmpdir = tmpdir
51        self.deploy_dir_image = deploy_dir_image
52        self.logfile = logfile
53        self.boottime = boottime
54        self.logged = False
55        self.thread = None
56        self.use_kvm = use_kvm
57        self.use_slirp = use_slirp
58        self.msg = ''
59
60        self.runqemutime = 120
61        self.qemu_pidfile = 'pidfile_'+str(os.getpid())
62        self.host_dumper = HostDumper(dump_host_cmds, dump_dir)
63        self.monitorpipe = None
64
65        self.logger = logger
66
67    def create_socket(self):
68        try:
69            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
70            sock.setblocking(0)
71            sock.bind(("127.0.0.1",0))
72            sock.listen(2)
73            port = sock.getsockname()[1]
74            self.logger.debug("Created listening socket for qemu serial console on: 127.0.0.1:%s" % port)
75            return (sock, port)
76
77        except socket.error:
78            sock.close()
79            raise
80
81    def log(self, msg):
82        if self.logfile:
83            # It is needed to sanitize the data received from qemu
84            # because is possible to have control characters
85            msg = msg.decode("utf-8", errors='ignore')
86            msg = re_control_char.sub('', msg)
87            self.msg += msg
88            with codecs.open(self.logfile, "a", encoding="utf-8") as f:
89                f.write("%s" % msg)
90
91    def getOutput(self, o):
92        import fcntl
93        fl = fcntl.fcntl(o, fcntl.F_GETFL)
94        fcntl.fcntl(o, fcntl.F_SETFL, fl | os.O_NONBLOCK)
95        return os.read(o.fileno(), 1000000).decode("utf-8")
96
97
98    def handleSIGCHLD(self, signum, frame):
99        if self.runqemu and self.runqemu.poll():
100            if self.runqemu.returncode:
101                self.logger.debug('runqemu exited with code %d' % self.runqemu.returncode)
102                self.logger.debug("Output from runqemu:\n%s" % self.getOutput(self.runqemu.stdout))
103                self.stop()
104                self._dump_host()
105                raise SystemExit
106
107    def start(self, qemuparams = None, get_ip = True, extra_bootparams = None, runqemuparams='', launch_cmd=None, discard_writes=True):
108        env = os.environ.copy()
109        if self.display:
110            env["DISPLAY"] = self.display
111            # Set this flag so that Qemu doesn't do any grabs as SDL grabs
112            # interact badly with screensavers.
113            env["QEMU_DONT_GRAB"] = "1"
114        if not os.path.exists(self.rootfs):
115            self.logger.error("Invalid rootfs %s" % self.rootfs)
116            return False
117        if not os.path.exists(self.tmpdir):
118            self.logger.error("Invalid TMPDIR path %s" % self.tmpdir)
119            return False
120        else:
121            env["OE_TMPDIR"] = self.tmpdir
122        if not os.path.exists(self.deploy_dir_image):
123            self.logger.error("Invalid DEPLOY_DIR_IMAGE path %s" % self.deploy_dir_image)
124            return False
125        else:
126            env["DEPLOY_DIR_IMAGE"] = self.deploy_dir_image
127
128        if not launch_cmd:
129            launch_cmd = 'runqemu %s %s ' % ('snapshot' if discard_writes else '', runqemuparams)
130            if self.use_kvm:
131                self.logger.debug('Using kvm for runqemu')
132                launch_cmd += ' kvm'
133            else:
134                self.logger.debug('Not using kvm for runqemu')
135            if not self.display:
136                launch_cmd += ' nographic'
137            if self.use_slirp:
138                launch_cmd += ' slirp'
139            launch_cmd += ' %s %s' % (self.machine, self.rootfs)
140
141        return self.launch(launch_cmd, qemuparams=qemuparams, get_ip=get_ip, extra_bootparams=extra_bootparams, env=env)
142
143    def launch(self, launch_cmd, get_ip = True, qemuparams = None, extra_bootparams = None, env = None):
144        try:
145            self.threadsock, threadport = self.create_socket()
146            self.server_socket, self.serverport = self.create_socket()
147        except socket.error as msg:
148            self.logger.error("Failed to create listening socket: %s" % msg[1])
149            return False
150
151        bootparams = 'console=tty1 console=ttyS0,115200n8 printk.time=1'
152        if extra_bootparams:
153            bootparams = bootparams + ' ' + extra_bootparams
154
155        # Ask QEMU to store the QEMU process PID in file, this way we don't have to parse running processes
156        # and analyze descendents in order to determine it.
157        if os.path.exists(self.qemu_pidfile):
158            os.remove(self.qemu_pidfile)
159        self.qemuparams = 'bootparams="{0}" qemuparams="-pidfile {1}"'.format(bootparams, self.qemu_pidfile)
160        if qemuparams:
161            self.qemuparams = self.qemuparams[:-1] + " " + qemuparams + " " + '\"'
162
163        launch_cmd += ' tcpserial=%s:%s %s' % (threadport, self.serverport, self.qemuparams)
164
165        self.origchldhandler = signal.getsignal(signal.SIGCHLD)
166        signal.signal(signal.SIGCHLD, self.handleSIGCHLD)
167
168        self.logger.debug('launchcmd=%s'%(launch_cmd))
169
170        # FIXME: We pass in stdin=subprocess.PIPE here to work around stty
171        # blocking at the end of the runqemu script when using this within
172        # oe-selftest (this makes stty error out immediately). There ought
173        # to be a proper fix but this will suffice for now.
174        self.runqemu = subprocess.Popen(launch_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE, preexec_fn=os.setpgrp, env=env)
175        output = self.runqemu.stdout
176
177        #
178        # We need the preexec_fn above so that all runqemu processes can easily be killed
179        # (by killing their process group). This presents a problem if this controlling
180        # process itself is killed however since those processes don't notice the death
181        # of the parent and merrily continue on.
182        #
183        # Rather than hack runqemu to deal with this, we add something here instead.
184        # Basically we fork off another process which holds an open pipe to the parent
185        # and also is setpgrp. If/when the pipe sees EOF from the parent dieing, it kills
186        # the process group. This is like pctrl's PDEATHSIG but for a process group
187        # rather than a single process.
188        #
189        r, w = os.pipe()
190        self.monitorpid = os.fork()
191        if self.monitorpid:
192            os.close(r)
193            self.monitorpipe = os.fdopen(w, "w")
194        else:
195            # child process
196            os.setpgrp()
197            os.close(w)
198            r = os.fdopen(r)
199            x = r.read()
200            os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM)
201            sys.exit(0)
202
203        self.logger.debug("runqemu started, pid is %s" % self.runqemu.pid)
204        self.logger.debug("waiting at most %s seconds for qemu pid (%s)" %
205                          (self.runqemutime, time.strftime("%D %H:%M:%S")))
206        endtime = time.time() + self.runqemutime
207        while not self.is_alive() and time.time() < endtime:
208            if self.runqemu.poll():
209                if self.runqemu.returncode:
210                    # No point waiting any longer
211                    self.logger.debug('runqemu exited with code %d' % self.runqemu.returncode)
212                    self._dump_host()
213                    self.logger.debug("Output from runqemu:\n%s" % self.getOutput(output))
214                    self.stop()
215                    return False
216            time.sleep(0.5)
217
218        if not self.is_alive():
219            self.logger.error("Qemu pid didn't appear in %s seconds (%s)" %
220                              (self.runqemutime, time.strftime("%D %H:%M:%S")))
221            # Dump all processes to help us to figure out what is going on...
222            ps = subprocess.Popen(['ps', 'axww', '-o', 'pid,ppid,command '], stdout=subprocess.PIPE).communicate()[0]
223            processes = ps.decode("utf-8")
224            self.logger.debug("Running processes:\n%s" % processes)
225            self._dump_host()
226            op = self.getOutput(output)
227            self.stop()
228            if op:
229                self.logger.error("Output from runqemu:\n%s" % op)
230            else:
231                self.logger.error("No output from runqemu.\n")
232            return False
233
234        # We are alive: qemu is running
235        out = self.getOutput(output)
236        netconf = False # network configuration is not required by default
237        self.logger.debug("qemu started in %s seconds - qemu procces pid is %s (%s)" %
238                          (time.time() - (endtime - self.runqemutime),
239                           self.qemupid, time.strftime("%D %H:%M:%S")))
240        if get_ip:
241            cmdline = ''
242            with open('/proc/%s/cmdline' % self.qemupid) as p:
243                cmdline = p.read()
244                # It is needed to sanitize the data received
245                # because is possible to have control characters
246                cmdline = re_control_char.sub(' ', cmdline)
247            try:
248                if self.use_slirp:
249                    tcp_ports = cmdline.split("hostfwd=tcp::")[1]
250                    host_port = tcp_ports[:tcp_ports.find('-')]
251                    self.ip = "localhost:%s" % host_port
252                else:
253                    ips = re.findall(r"((?:[0-9]{1,3}\.){3}[0-9]{1,3})", cmdline.split("ip=")[1])
254                    self.ip = ips[0]
255                    self.server_ip = ips[1]
256                self.logger.debug("qemu cmdline used:\n{}".format(cmdline))
257            except (IndexError, ValueError):
258                # Try to get network configuration from runqemu output
259                match = re.match(r'.*Network configuration: ([0-9.]+)::([0-9.]+):([0-9.]+)$.*',
260                                 out, re.MULTILINE|re.DOTALL)
261                if match:
262                    self.ip, self.server_ip, self.netmask = match.groups()
263                    # network configuration is required as we couldn't get it
264                    # from the runqemu command line, so qemu doesn't run kernel
265                    # and guest networking is not configured
266                    netconf = True
267                else:
268                    self.logger.error("Couldn't get ip from qemu command line and runqemu output! "
269                                 "Here is the qemu command line used:\n%s\n"
270                                 "and output from runqemu:\n%s" % (cmdline, out))
271                    self._dump_host()
272                    self.stop()
273                    return False
274
275        self.logger.debug("Target IP: %s" % self.ip)
276        self.logger.debug("Server IP: %s" % self.server_ip)
277
278        self.thread = LoggingThread(self.log, self.threadsock, self.logger)
279        self.thread.start()
280        if not self.thread.connection_established.wait(self.boottime):
281            self.logger.error("Didn't receive a console connection from qemu. "
282                         "Here is the qemu command line used:\n%s\nand "
283                         "output from runqemu:\n%s" % (cmdline, out))
284            self.stop_thread()
285            return False
286
287        self.logger.debug("Output from runqemu:\n%s", out)
288        self.logger.debug("Waiting at most %d seconds for login banner (%s)" %
289                          (self.boottime, time.strftime("%D %H:%M:%S")))
290        endtime = time.time() + self.boottime
291        socklist = [self.server_socket]
292        reachedlogin = False
293        stopread = False
294        qemusock = None
295        bootlog = b''
296        data = b''
297        while time.time() < endtime and not stopread:
298            try:
299                sread, swrite, serror = select.select(socklist, [], [], 5)
300            except InterruptedError:
301                continue
302            for sock in sread:
303                if sock is self.server_socket:
304                    qemusock, addr = self.server_socket.accept()
305                    qemusock.setblocking(0)
306                    socklist.append(qemusock)
307                    socklist.remove(self.server_socket)
308                    self.logger.debug("Connection from %s:%s" % addr)
309                else:
310                    data = data + sock.recv(1024)
311                    if data:
312                        bootlog += data
313                        data = b''
314                        if b' login:' in bootlog:
315                            self.server_socket = qemusock
316                            stopread = True
317                            reachedlogin = True
318                            self.logger.debug("Reached login banner in %s seconds (%s)" %
319                                              (time.time() - (endtime - self.boottime),
320                                              time.strftime("%D %H:%M:%S")))
321                    else:
322                        # no need to check if reachedlogin unless we support multiple connections
323                        self.logger.debug("QEMU socket disconnected before login banner reached. (%s)" %
324                                          time.strftime("%D %H:%M:%S"))
325                        socklist.remove(sock)
326                        sock.close()
327                        stopread = True
328
329
330        if not reachedlogin:
331            if time.time() >= endtime:
332                self.logger.debug("Target didn't reach login banner in %d seconds (%s)" %
333                                  (self.boottime, time.strftime("%D %H:%M:%S")))
334            tail = lambda l: "\n".join(l.splitlines()[-25:])
335            bootlog = bootlog.decode("utf-8")
336            # in case bootlog is empty, use tail qemu log store at self.msg
337            lines = tail(bootlog if bootlog else self.msg)
338            self.logger.debug("Last 25 lines of text:\n%s" % lines)
339            self.logger.debug("Check full boot log: %s" % self.logfile)
340            self._dump_host()
341            self.stop()
342            return False
343
344        # If we are not able to login the tests can continue
345        try:
346            (status, output) = self.run_serial("root\n", raw=True)
347            if re.search(r"root@[a-zA-Z0-9\-]+:~#", output):
348                self.logged = True
349                self.logger.debug("Logged as root in serial console")
350                if netconf:
351                    # configure guest networking
352                    cmd = "ifconfig eth0 %s netmask %s up\n" % (self.ip, self.netmask)
353                    output = self.run_serial(cmd, raw=True)[1]
354                    if re.search(r"root@[a-zA-Z0-9\-]+:~#", output):
355                        self.logger.debug("configured ip address %s", self.ip)
356                    else:
357                        self.logger.debug("Couldn't configure guest networking")
358            else:
359                self.logger.debug("Couldn't login into serial console"
360                            " as root using blank password")
361                self.logger.debug("The output:\n%s" % output)
362        except:
363            self.logger.debug("Serial console failed while trying to login")
364        return True
365
366    def stop(self):
367        if hasattr(self, "origchldhandler"):
368            signal.signal(signal.SIGCHLD, self.origchldhandler)
369        self.stop_thread()
370        self.stop_qemu_system()
371        if self.runqemu:
372            if hasattr(self, "monitorpid"):
373                os.kill(self.monitorpid, signal.SIGKILL)
374                self.logger.debug("Sending SIGTERM to runqemu")
375                try:
376                    os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM)
377                except OSError as e:
378                    if e.errno != errno.ESRCH:
379                        raise
380            endtime = time.time() + self.runqemutime
381            while self.runqemu.poll() is None and time.time() < endtime:
382                time.sleep(1)
383            if self.runqemu.poll() is None:
384                self.logger.debug("Sending SIGKILL to runqemu")
385                os.killpg(os.getpgid(self.runqemu.pid), signal.SIGKILL)
386            self.runqemu.stdin.close()
387            self.runqemu.stdout.close()
388            self.runqemu = None
389
390        if hasattr(self, 'server_socket') and self.server_socket:
391            self.server_socket.close()
392            self.server_socket = None
393        if hasattr(self, 'threadsock') and self.threadsock:
394            self.threadsock.close()
395            self.threadsock = None
396        self.qemupid = None
397        self.ip = None
398        if os.path.exists(self.qemu_pidfile):
399            os.remove(self.qemu_pidfile)
400        if self.monitorpipe:
401            self.monitorpipe.close()
402
403    def stop_qemu_system(self):
404        if self.qemupid:
405            try:
406                # qemu-system behaves well and a SIGTERM is enough
407                os.kill(self.qemupid, signal.SIGTERM)
408            except ProcessLookupError as e:
409                self.logger.warning('qemu-system ended unexpectedly')
410
411    def stop_thread(self):
412        if self.thread and self.thread.is_alive():
413            self.thread.stop()
414            self.thread.join()
415
416    def restart(self, qemuparams = None):
417        self.logger.debug("Restarting qemu process")
418        if self.runqemu.poll() is None:
419            self.stop()
420        if self.start(qemuparams):
421            return True
422        return False
423
424    def is_alive(self):
425        if not self.runqemu or self.runqemu.poll() is not None:
426            return False
427        if os.path.isfile(self.qemu_pidfile):
428            f = open(self.qemu_pidfile, 'r')
429            qemu_pid = f.read()
430            f.close()
431            qemupid = int(qemu_pid)
432            if os.path.exists("/proc/" + str(qemupid)):
433                self.qemupid = qemupid
434                return True
435        return False
436
437    def run_serial(self, command, raw=False, timeout=60):
438        # We assume target system have echo to get command status
439        if not raw:
440            command = "%s; echo $?\n" % command
441
442        data = ''
443        status = 0
444        self.server_socket.sendall(command.encode('utf-8'))
445        start = time.time()
446        end = start + timeout
447        while True:
448            now = time.time()
449            if now >= end:
450                data += "<<< run_serial(): command timed out after %d seconds without output >>>\r\n\r\n" % timeout
451                break
452            try:
453                sread, _, _ = select.select([self.server_socket],[],[], end - now)
454            except InterruptedError:
455                continue
456            if sread:
457                answer = self.server_socket.recv(1024)
458                if answer:
459                    data += answer.decode('utf-8')
460                    # Search the prompt to stop
461                    if re.search(r"[a-zA-Z0-9]+@[a-zA-Z0-9\-]+:~#", data):
462                        break
463                else:
464                    raise Exception("No data on serial console socket")
465
466        if data:
467            if raw:
468                status = 1
469            else:
470                # Remove first line (command line) and last line (prompt)
471                data = data[data.find('$?\r\n')+4:data.rfind('\r\n')]
472                index = data.rfind('\r\n')
473                if index == -1:
474                    status_cmd = data
475                    data = ""
476                else:
477                    status_cmd = data[index+2:]
478                    data = data[:index]
479                if (status_cmd == "0"):
480                    status = 1
481        return (status, str(data))
482
483
484    def _dump_host(self):
485        self.host_dumper.create_dir("qemu")
486        self.logger.warning("Qemu ended unexpectedly, dump data from host"
487                " is in %s" % self.host_dumper.dump_dir)
488        self.host_dumper.dump_host()
489
490# This class is for reading data from a socket and passing it to logfunc
491# to be processed. It's completely event driven and has a straightforward
492# event loop. The mechanism for stopping the thread is a simple pipe which
493# will wake up the poll and allow for tearing everything down.
494class LoggingThread(threading.Thread):
495    def __init__(self, logfunc, sock, logger):
496        self.connection_established = threading.Event()
497        self.serversock = sock
498        self.logfunc = logfunc
499        self.logger = logger
500        self.readsock = None
501        self.running = False
502
503        self.errorevents = select.POLLERR | select.POLLHUP | select.POLLNVAL
504        self.readevents = select.POLLIN | select.POLLPRI
505
506        threading.Thread.__init__(self, target=self.threadtarget)
507
508    def threadtarget(self):
509        try:
510            self.eventloop()
511        finally:
512            self.teardown()
513
514    def run(self):
515        self.logger.debug("Starting logging thread")
516        self.readpipe, self.writepipe = os.pipe()
517        threading.Thread.run(self)
518
519    def stop(self):
520        self.logger.debug("Stopping logging thread")
521        if self.running:
522            os.write(self.writepipe, bytes("stop", "utf-8"))
523
524    def teardown(self):
525        self.logger.debug("Tearing down logging thread")
526        self.close_socket(self.serversock)
527
528        if self.readsock is not None:
529            self.close_socket(self.readsock)
530
531        self.close_ignore_error(self.readpipe)
532        self.close_ignore_error(self.writepipe)
533        self.running = False
534
535    def eventloop(self):
536        poll = select.poll()
537        event_read_mask = self.errorevents | self.readevents
538        poll.register(self.serversock.fileno())
539        poll.register(self.readpipe, event_read_mask)
540
541        breakout = False
542        self.running = True
543        self.logger.debug("Starting thread event loop")
544        while not breakout:
545            events = poll.poll()
546            for event in events:
547                # An error occurred, bail out
548                if event[1] & self.errorevents:
549                    raise Exception(self.stringify_event(event[1]))
550
551                # Event to stop the thread
552                if self.readpipe == event[0]:
553                    self.logger.debug("Stop event received")
554                    breakout = True
555                    break
556
557                # A connection request was received
558                elif self.serversock.fileno() == event[0]:
559                    self.logger.debug("Connection request received")
560                    self.readsock, _ = self.serversock.accept()
561                    self.readsock.setblocking(0)
562                    poll.unregister(self.serversock.fileno())
563                    poll.register(self.readsock.fileno(), event_read_mask)
564
565                    self.logger.debug("Setting connection established event")
566                    self.connection_established.set()
567
568                # Actual data to be logged
569                elif self.readsock.fileno() == event[0]:
570                    data = self.recv(1024)
571                    self.logfunc(data)
572
573    # Since the socket is non-blocking make sure to honor EAGAIN
574    # and EWOULDBLOCK.
575    def recv(self, count):
576        try:
577            data = self.readsock.recv(count)
578        except socket.error as e:
579            if e.errno == errno.EAGAIN or e.errno == errno.EWOULDBLOCK:
580                return ''
581            else:
582                raise
583
584        if data is None:
585            raise Exception("No data on read ready socket")
586        elif not data:
587            # This actually means an orderly shutdown
588            # happened. But for this code it counts as an
589            # error since the connection shouldn't go away
590            # until qemu exits.
591            raise Exception("Console connection closed unexpectedly")
592
593        return data
594
595    def stringify_event(self, event):
596        val = ''
597        if select.POLLERR == event:
598            val = 'POLLER'
599        elif select.POLLHUP == event:
600            val = 'POLLHUP'
601        elif select.POLLNVAL == event:
602            val = 'POLLNVAL'
603        return val
604
605    def close_socket(self, sock):
606        sock.shutdown(socket.SHUT_RDWR)
607        sock.close()
608
609    def close_ignore_error(self, fd):
610        try:
611            os.close(fd)
612        except OSError:
613            pass
614