1#
2# Copyright (C) 2013 Intel Corporation
3#
4# SPDX-License-Identifier: MIT
5#
6
7# This module provides a class for starting qemu images using runqemu.
8# It's used by testimage.bbclass.
9
10import subprocess
11import os
12import sys
13import time
14import signal
15import re
16import socket
17import select
18import errno
19import string
20import threading
21import codecs
22import logging
23from oeqa.utils.dump import HostDumper
24
25# Get Unicode non printable control chars
26control_range = list(range(0,32))+list(range(127,160))
27control_chars = [chr(x) for x in control_range
28                if chr(x) not in string.printable]
29re_control_char = re.compile('[%s]' % re.escape("".join(control_chars)))
30
31class QemuRunner:
32
33    def __init__(self, machine, rootfs, display, tmpdir, deploy_dir_image, logfile, boottime, dump_dir, dump_host_cmds,
34                 use_kvm, logger, use_slirp=False):
35
36        # Popen object for runqemu
37        self.runqemu = None
38        # pid of the qemu process that runqemu will start
39        self.qemupid = None
40        # target ip - from the command line or runqemu output
41        self.ip = None
42        # host ip - where qemu is running
43        self.server_ip = None
44        # target ip netmask
45        self.netmask = None
46
47        self.machine = machine
48        self.rootfs = rootfs
49        self.display = display
50        self.tmpdir = tmpdir
51        self.deploy_dir_image = deploy_dir_image
52        self.logfile = logfile
53        self.boottime = boottime
54        self.logged = False
55        self.thread = None
56        self.use_kvm = use_kvm
57        self.use_slirp = use_slirp
58        self.msg = ''
59
60        self.runqemutime = 120
61        self.qemu_pidfile = 'pidfile_'+str(os.getpid())
62        self.host_dumper = HostDumper(dump_host_cmds, dump_dir)
63        self.monitorpipe = None
64
65        self.logger = logger
66
67    def create_socket(self):
68        try:
69            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
70            sock.setblocking(0)
71            sock.bind(("127.0.0.1",0))
72            sock.listen(2)
73            port = sock.getsockname()[1]
74            self.logger.debug("Created listening socket for qemu serial console on: 127.0.0.1:%s" % port)
75            return (sock, port)
76
77        except socket.error:
78            sock.close()
79            raise
80
81    def log(self, msg):
82        if self.logfile:
83            # It is needed to sanitize the data received from qemu
84            # because is possible to have control characters
85            msg = msg.decode("utf-8", errors='ignore')
86            msg = re_control_char.sub('', msg)
87            self.msg += msg
88            with codecs.open(self.logfile, "a", encoding="utf-8") as f:
89                f.write("%s" % msg)
90
91    def getOutput(self, o):
92        import fcntl
93        fl = fcntl.fcntl(o, fcntl.F_GETFL)
94        fcntl.fcntl(o, fcntl.F_SETFL, fl | os.O_NONBLOCK)
95        return os.read(o.fileno(), 1000000).decode("utf-8")
96
97
98    def handleSIGCHLD(self, signum, frame):
99        if self.runqemu and self.runqemu.poll():
100            if self.runqemu.returncode:
101                self.logger.warning('runqemu exited with code %d' % self.runqemu.returncode)
102                self.logger.debug("Output from runqemu:\n%s" % self.getOutput(self.runqemu.stdout))
103                self.stop()
104                self._dump_host()
105                raise SystemExit
106
107    def start(self, qemuparams = None, get_ip = True, extra_bootparams = None, runqemuparams='', launch_cmd=None, discard_writes=True):
108        env = os.environ.copy()
109        if self.display:
110            env["DISPLAY"] = self.display
111            # Set this flag so that Qemu doesn't do any grabs as SDL grabs
112            # interact badly with screensavers.
113            env["QEMU_DONT_GRAB"] = "1"
114        if not os.path.exists(self.rootfs):
115            self.logger.error("Invalid rootfs %s" % self.rootfs)
116            return False
117        if not os.path.exists(self.tmpdir):
118            self.logger.error("Invalid TMPDIR path %s" % self.tmpdir)
119            return False
120        else:
121            env["OE_TMPDIR"] = self.tmpdir
122        if not os.path.exists(self.deploy_dir_image):
123            self.logger.error("Invalid DEPLOY_DIR_IMAGE path %s" % self.deploy_dir_image)
124            return False
125        else:
126            env["DEPLOY_DIR_IMAGE"] = self.deploy_dir_image
127
128        if not launch_cmd:
129            launch_cmd = 'runqemu %s' % ('snapshot' if discard_writes else '')
130            if self.use_kvm:
131                self.logger.debug('Using kvm for runqemu')
132                launch_cmd += ' kvm'
133            else:
134                self.logger.debug('Not using kvm for runqemu')
135            if not self.display:
136                launch_cmd += ' nographic'
137            if self.use_slirp:
138                launch_cmd += ' slirp'
139            launch_cmd += ' %s %s %s' % (runqemuparams, self.machine, self.rootfs)
140
141        return self.launch(launch_cmd, qemuparams=qemuparams, get_ip=get_ip, extra_bootparams=extra_bootparams, env=env)
142
143    def launch(self, launch_cmd, get_ip = True, qemuparams = None, extra_bootparams = None, env = None):
144        try:
145            self.threadsock, threadport = self.create_socket()
146            self.server_socket, self.serverport = self.create_socket()
147        except socket.error as msg:
148            self.logger.error("Failed to create listening socket: %s" % msg[1])
149            return False
150
151        bootparams = 'console=tty1 console=ttyS0,115200n8 printk.time=1'
152        if extra_bootparams:
153            bootparams = bootparams + ' ' + extra_bootparams
154
155        # Ask QEMU to store the QEMU process PID in file, this way we don't have to parse running processes
156        # and analyze descendents in order to determine it.
157        if os.path.exists(self.qemu_pidfile):
158            os.remove(self.qemu_pidfile)
159        self.qemuparams = 'bootparams="{0}" qemuparams="-pidfile {1}"'.format(bootparams, self.qemu_pidfile)
160        if qemuparams:
161            self.qemuparams = self.qemuparams[:-1] + " " + qemuparams + " " + '\"'
162
163        launch_cmd += ' tcpserial=%s:%s %s' % (threadport, self.serverport, self.qemuparams)
164
165        self.origchldhandler = signal.getsignal(signal.SIGCHLD)
166        signal.signal(signal.SIGCHLD, self.handleSIGCHLD)
167
168        self.logger.debug('launchcmd=%s'%(launch_cmd))
169
170        # FIXME: We pass in stdin=subprocess.PIPE here to work around stty
171        # blocking at the end of the runqemu script when using this within
172        # oe-selftest (this makes stty error out immediately). There ought
173        # to be a proper fix but this will suffice for now.
174        self.runqemu = subprocess.Popen(launch_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE, preexec_fn=os.setpgrp, env=env)
175        output = self.runqemu.stdout
176
177        #
178        # We need the preexec_fn above so that all runqemu processes can easily be killed
179        # (by killing their process group). This presents a problem if this controlling
180        # process itself is killed however since those processes don't notice the death
181        # of the parent and merrily continue on.
182        #
183        # Rather than hack runqemu to deal with this, we add something here instead.
184        # Basically we fork off another process which holds an open pipe to the parent
185        # and also is setpgrp. If/when the pipe sees EOF from the parent dieing, it kills
186        # the process group. This is like pctrl's PDEATHSIG but for a process group
187        # rather than a single process.
188        #
189        r, w = os.pipe()
190        self.monitorpid = os.fork()
191        if self.monitorpid:
192            os.close(r)
193            self.monitorpipe = os.fdopen(w, "w")
194        else:
195            # child process
196            os.setpgrp()
197            os.close(w)
198            r = os.fdopen(r)
199            x = r.read()
200            os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM)
201            sys.exit(0)
202
203        self.logger.debug("runqemu started, pid is %s" % self.runqemu.pid)
204        self.logger.debug("waiting at most %s seconds for qemu pid (%s)" %
205                          (self.runqemutime, time.strftime("%D %H:%M:%S")))
206        endtime = time.time() + self.runqemutime
207        while not self.is_alive() and time.time() < endtime:
208            if self.runqemu.poll():
209                if self.runqemu.returncode:
210                    # No point waiting any longer
211                    self.logger.warning('runqemu exited with code %d' % self.runqemu.returncode)
212                    self._dump_host()
213                    self.logger.warning("Output from runqemu:\n%s" % self.getOutput(output))
214                    self.stop()
215                    return False
216            time.sleep(0.5)
217
218        if not self.is_alive():
219            self.logger.error("Qemu pid didn't appear in %s seconds (%s)" %
220                              (self.runqemutime, time.strftime("%D %H:%M:%S")))
221            # Dump all processes to help us to figure out what is going on...
222            ps = subprocess.Popen(['ps', 'axww', '-o', 'pid,ppid,command '], stdout=subprocess.PIPE).communicate()[0]
223            processes = ps.decode("utf-8")
224            self.logger.debug("Running processes:\n%s" % processes)
225            self._dump_host()
226            op = self.getOutput(output)
227            self.stop()
228            if op:
229                self.logger.error("Output from runqemu:\n%s" % op)
230            else:
231                self.logger.error("No output from runqemu.\n")
232            return False
233
234        # We are alive: qemu is running
235        out = self.getOutput(output)
236        netconf = False # network configuration is not required by default
237        self.logger.debug("qemu started in %s seconds - qemu procces pid is %s (%s)" %
238                          (time.time() - (endtime - self.runqemutime),
239                           self.qemupid, time.strftime("%D %H:%M:%S")))
240        if get_ip:
241            cmdline = ''
242            with open('/proc/%s/cmdline' % self.qemupid) as p:
243                cmdline = p.read()
244                # It is needed to sanitize the data received
245                # because is possible to have control characters
246                cmdline = re_control_char.sub(' ', cmdline)
247            try:
248                if self.use_slirp:
249                    tcp_ports = cmdline.split("hostfwd=tcp::")[1]
250                    host_port = tcp_ports[:tcp_ports.find('-')]
251                    self.ip = "localhost:%s" % host_port
252                else:
253                    ips = re.findall(r"((?:[0-9]{1,3}\.){3}[0-9]{1,3})", cmdline.split("ip=")[1])
254                    self.ip = ips[0]
255                    self.server_ip = ips[1]
256                self.logger.debug("qemu cmdline used:\n{}".format(cmdline))
257            except (IndexError, ValueError):
258                # Try to get network configuration from runqemu output
259                match = re.match(r'.*Network configuration: ([0-9.]+)::([0-9.]+):([0-9.]+)$.*',
260                                 out, re.MULTILINE|re.DOTALL)
261                if match:
262                    self.ip, self.server_ip, self.netmask = match.groups()
263                    # network configuration is required as we couldn't get it
264                    # from the runqemu command line, so qemu doesn't run kernel
265                    # and guest networking is not configured
266                    netconf = True
267                else:
268                    self.logger.error("Couldn't get ip from qemu command line and runqemu output! "
269                                 "Here is the qemu command line used:\n%s\n"
270                                 "and output from runqemu:\n%s" % (cmdline, out))
271                    self._dump_host()
272                    self.stop()
273                    return False
274
275        self.logger.debug("Target IP: %s" % self.ip)
276        self.logger.debug("Server IP: %s" % self.server_ip)
277
278        self.thread = LoggingThread(self.log, self.threadsock, self.logger)
279        self.thread.start()
280        if not self.thread.connection_established.wait(self.boottime):
281            self.logger.error("Didn't receive a console connection from qemu. "
282                         "Here is the qemu command line used:\n%s\nand "
283                         "output from runqemu:\n%s" % (cmdline, out))
284            self.stop_thread()
285            return False
286
287        self.logger.debug("Output from runqemu:\n%s", out)
288        self.logger.debug("Waiting at most %d seconds for login banner (%s)" %
289                          (self.boottime, time.strftime("%D %H:%M:%S")))
290        endtime = time.time() + self.boottime
291        socklist = [self.server_socket]
292        reachedlogin = False
293        stopread = False
294        qemusock = None
295        bootlog = b''
296        data = b''
297        while time.time() < endtime and not stopread:
298            try:
299                sread, swrite, serror = select.select(socklist, [], [], 5)
300            except InterruptedError:
301                continue
302            for sock in sread:
303                if sock is self.server_socket:
304                    qemusock, addr = self.server_socket.accept()
305                    qemusock.setblocking(0)
306                    socklist.append(qemusock)
307                    socklist.remove(self.server_socket)
308                    self.logger.debug("Connection from %s:%s" % addr)
309                else:
310                    data = data + sock.recv(1024)
311                    if data:
312                        bootlog += data
313                        data = b''
314                        if b' login:' in bootlog:
315                            self.server_socket = qemusock
316                            stopread = True
317                            reachedlogin = True
318                            self.logger.debug("Reached login banner in %s seconds (%s)" %
319                                              (time.time() - (endtime - self.boottime),
320                                              time.strftime("%D %H:%M:%S")))
321                    else:
322                        # no need to check if reachedlogin unless we support multiple connections
323                        self.logger.debug("QEMU socket disconnected before login banner reached. (%s)" %
324                                          time.strftime("%D %H:%M:%S"))
325                        socklist.remove(sock)
326                        sock.close()
327                        stopread = True
328
329
330        if not reachedlogin:
331            if time.time() >= endtime:
332                self.logger.warning("Target didn't reach login banner in %d seconds (%s)" %
333                                  (self.boottime, time.strftime("%D %H:%M:%S")))
334            tail = lambda l: "\n".join(l.splitlines()[-25:])
335            bootlog = bootlog.decode("utf-8")
336            # in case bootlog is empty, use tail qemu log store at self.msg
337            lines = tail(bootlog if bootlog else self.msg)
338            self.logger.warning("Last 25 lines of text:\n%s" % lines)
339            self.logger.warning("Check full boot log: %s" % self.logfile)
340            self._dump_host()
341            self.stop()
342            return False
343
344        # If we are not able to login the tests can continue
345        try:
346            (status, output) = self.run_serial("root\n", raw=True)
347            if re.search(r"root@[a-zA-Z0-9\-]+:~#", output):
348                self.logged = True
349                self.logger.debug("Logged as root in serial console")
350                if netconf:
351                    # configure guest networking
352                    cmd = "ifconfig eth0 %s netmask %s up\n" % (self.ip, self.netmask)
353                    output = self.run_serial(cmd, raw=True)[1]
354                    if re.search(r"root@[a-zA-Z0-9\-]+:~#", output):
355                        self.logger.debug("configured ip address %s", self.ip)
356                    else:
357                        self.logger.debug("Couldn't configure guest networking")
358            else:
359                self.logger.warning("Couldn't login into serial console"
360                            " as root using blank password")
361                self.logger.warning("The output:\n%s" % output)
362        except:
363            self.logger.warning("Serial console failed while trying to login")
364        return True
365
366    def stop(self):
367        if hasattr(self, "origchldhandler"):
368            signal.signal(signal.SIGCHLD, self.origchldhandler)
369        self.stop_thread()
370        self.stop_qemu_system()
371        if self.runqemu:
372            if hasattr(self, "monitorpid"):
373                os.kill(self.monitorpid, signal.SIGKILL)
374                self.logger.debug("Sending SIGTERM to runqemu")
375                try:
376                    os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM)
377                except OSError as e:
378                    if e.errno != errno.ESRCH:
379                        raise
380            endtime = time.time() + self.runqemutime
381            while self.runqemu.poll() is None and time.time() < endtime:
382                time.sleep(1)
383            if self.runqemu.poll() is None:
384                self.logger.debug("Sending SIGKILL to runqemu")
385                os.killpg(os.getpgid(self.runqemu.pid), signal.SIGKILL)
386            self.runqemu.stdin.close()
387            self.runqemu.stdout.close()
388            self.runqemu = None
389
390        if hasattr(self, 'server_socket') and self.server_socket:
391            self.server_socket.close()
392            self.server_socket = None
393        if hasattr(self, 'threadsock') and self.threadsock:
394            self.threadsock.close()
395            self.threadsock = None
396        self.qemupid = None
397        self.ip = None
398        if os.path.exists(self.qemu_pidfile):
399            os.remove(self.qemu_pidfile)
400        if self.monitorpipe:
401            self.monitorpipe.close()
402
403    def stop_qemu_system(self):
404        if self.qemupid:
405            try:
406                # qemu-system behaves well and a SIGTERM is enough
407                os.kill(self.qemupid, signal.SIGTERM)
408            except ProcessLookupError as e:
409                self.logger.warning('qemu-system ended unexpectedly')
410
411    def stop_thread(self):
412        if self.thread and self.thread.is_alive():
413            self.thread.stop()
414            self.thread.join()
415
416    def restart(self, qemuparams = None):
417        self.logger.warning("Restarting qemu process")
418        if self.runqemu.poll() is None:
419            self.stop()
420        if self.start(qemuparams):
421            return True
422        return False
423
424    def is_alive(self):
425        if not self.runqemu or self.runqemu.poll() is not None:
426            return False
427        if os.path.isfile(self.qemu_pidfile):
428            # when handling pidfile, qemu creates the file, stat it, lock it and then write to it
429            # so it's possible that the file has been created but the content is empty
430            pidfile_timeout = time.time() + 3
431            while time.time() < pidfile_timeout:
432                with open(self.qemu_pidfile, 'r') as f:
433                    qemu_pid = f.read().strip()
434                # file created but not yet written contents
435                if not qemu_pid:
436                    time.sleep(0.5)
437                    continue
438                else:
439                    if os.path.exists("/proc/" + qemu_pid):
440                        self.qemupid = int(qemu_pid)
441                        return True
442        return False
443
444    def run_serial(self, command, raw=False, timeout=60):
445        # We assume target system have echo to get command status
446        if not raw:
447            command = "%s; echo $?\n" % command
448
449        data = ''
450        status = 0
451        self.server_socket.sendall(command.encode('utf-8'))
452        start = time.time()
453        end = start + timeout
454        while True:
455            now = time.time()
456            if now >= end:
457                data += "<<< run_serial(): command timed out after %d seconds without output >>>\r\n\r\n" % timeout
458                break
459            try:
460                sread, _, _ = select.select([self.server_socket],[],[], end - now)
461            except InterruptedError:
462                continue
463            if sread:
464                answer = self.server_socket.recv(1024)
465                if answer:
466                    data += answer.decode('utf-8')
467                    # Search the prompt to stop
468                    if re.search(r"[a-zA-Z0-9]+@[a-zA-Z0-9\-]+:~#", data):
469                        break
470                else:
471                    raise Exception("No data on serial console socket")
472
473        if data:
474            if raw:
475                status = 1
476            else:
477                # Remove first line (command line) and last line (prompt)
478                data = data[data.find('$?\r\n')+4:data.rfind('\r\n')]
479                index = data.rfind('\r\n')
480                if index == -1:
481                    status_cmd = data
482                    data = ""
483                else:
484                    status_cmd = data[index+2:]
485                    data = data[:index]
486                if (status_cmd == "0"):
487                    status = 1
488        return (status, str(data))
489
490
491    def _dump_host(self):
492        self.host_dumper.create_dir("qemu")
493        self.logger.warning("Qemu ended unexpectedly, dump data from host"
494                " is in %s" % self.host_dumper.dump_dir)
495        self.host_dumper.dump_host()
496
497# This class is for reading data from a socket and passing it to logfunc
498# to be processed. It's completely event driven and has a straightforward
499# event loop. The mechanism for stopping the thread is a simple pipe which
500# will wake up the poll and allow for tearing everything down.
501class LoggingThread(threading.Thread):
502    def __init__(self, logfunc, sock, logger):
503        self.connection_established = threading.Event()
504        self.serversock = sock
505        self.logfunc = logfunc
506        self.logger = logger
507        self.readsock = None
508        self.running = False
509
510        self.errorevents = select.POLLERR | select.POLLHUP | select.POLLNVAL
511        self.readevents = select.POLLIN | select.POLLPRI
512
513        threading.Thread.__init__(self, target=self.threadtarget)
514
515    def threadtarget(self):
516        try:
517            self.eventloop()
518        finally:
519            self.teardown()
520
521    def run(self):
522        self.logger.debug("Starting logging thread")
523        self.readpipe, self.writepipe = os.pipe()
524        threading.Thread.run(self)
525
526    def stop(self):
527        self.logger.debug("Stopping logging thread")
528        if self.running:
529            os.write(self.writepipe, bytes("stop", "utf-8"))
530
531    def teardown(self):
532        self.logger.debug("Tearing down logging thread")
533        self.close_socket(self.serversock)
534
535        if self.readsock is not None:
536            self.close_socket(self.readsock)
537
538        self.close_ignore_error(self.readpipe)
539        self.close_ignore_error(self.writepipe)
540        self.running = False
541
542    def eventloop(self):
543        poll = select.poll()
544        event_read_mask = self.errorevents | self.readevents
545        poll.register(self.serversock.fileno())
546        poll.register(self.readpipe, event_read_mask)
547
548        breakout = False
549        self.running = True
550        self.logger.debug("Starting thread event loop")
551        while not breakout:
552            events = poll.poll()
553            for event in events:
554                # An error occurred, bail out
555                if event[1] & self.errorevents:
556                    raise Exception(self.stringify_event(event[1]))
557
558                # Event to stop the thread
559                if self.readpipe == event[0]:
560                    self.logger.debug("Stop event received")
561                    breakout = True
562                    break
563
564                # A connection request was received
565                elif self.serversock.fileno() == event[0]:
566                    self.logger.debug("Connection request received")
567                    self.readsock, _ = self.serversock.accept()
568                    self.readsock.setblocking(0)
569                    poll.unregister(self.serversock.fileno())
570                    poll.register(self.readsock.fileno(), event_read_mask)
571
572                    self.logger.debug("Setting connection established event")
573                    self.connection_established.set()
574
575                # Actual data to be logged
576                elif self.readsock.fileno() == event[0]:
577                    data = self.recv(1024)
578                    self.logfunc(data)
579
580    # Since the socket is non-blocking make sure to honor EAGAIN
581    # and EWOULDBLOCK.
582    def recv(self, count):
583        try:
584            data = self.readsock.recv(count)
585        except socket.error as e:
586            if e.errno == errno.EAGAIN or e.errno == errno.EWOULDBLOCK:
587                return ''
588            else:
589                raise
590
591        if data is None:
592            raise Exception("No data on read ready socket")
593        elif not data:
594            # This actually means an orderly shutdown
595            # happened. But for this code it counts as an
596            # error since the connection shouldn't go away
597            # until qemu exits.
598            raise Exception("Console connection closed unexpectedly")
599
600        return data
601
602    def stringify_event(self, event):
603        val = ''
604        if select.POLLERR == event:
605            val = 'POLLER'
606        elif select.POLLHUP == event:
607            val = 'POLLHUP'
608        elif select.POLLNVAL == event:
609            val = 'POLLNVAL'
610        return val
611
612    def close_socket(self, sock):
613        sock.shutdown(socket.SHUT_RDWR)
614        sock.close()
615
616    def close_ignore_error(self, fd):
617        try:
618            os.close(fd)
619        except OSError:
620            pass
621