1#
2# Copyright (C) 2013 Intel Corporation
3#
4# SPDX-License-Identifier: MIT
5#
6
7# This module provides a class for starting qemu images using runqemu.
8# It's used by testimage.bbclass.
9
10import subprocess
11import os
12import sys
13import time
14import signal
15import re
16import socket
17import select
18import errno
19import string
20import threading
21import codecs
22import logging
23from oeqa.utils.dump import HostDumper
24
25# Get Unicode non printable control chars
26control_range = list(range(0,32))+list(range(127,160))
27control_chars = [chr(x) for x in control_range
28                if chr(x) not in string.printable]
29re_control_char = re.compile('[%s]' % re.escape("".join(control_chars)))
30
31class QemuRunner:
32
33    def __init__(self, machine, rootfs, display, tmpdir, deploy_dir_image, logfile, boottime, dump_dir, dump_host_cmds,
34                 use_kvm, logger, use_slirp=False):
35
36        # Popen object for runqemu
37        self.runqemu = None
38        # pid of the qemu process that runqemu will start
39        self.qemupid = None
40        # target ip - from the command line or runqemu output
41        self.ip = None
42        # host ip - where qemu is running
43        self.server_ip = None
44        # target ip netmask
45        self.netmask = None
46
47        self.machine = machine
48        self.rootfs = rootfs
49        self.display = display
50        self.tmpdir = tmpdir
51        self.deploy_dir_image = deploy_dir_image
52        self.logfile = logfile
53        self.boottime = boottime
54        self.logged = False
55        self.thread = None
56        self.use_kvm = use_kvm
57        self.use_slirp = use_slirp
58        self.msg = ''
59
60        self.runqemutime = 120
61        self.qemu_pidfile = 'pidfile_'+str(os.getpid())
62        self.host_dumper = HostDumper(dump_host_cmds, dump_dir)
63
64        self.logger = logger
65
66    def create_socket(self):
67        try:
68            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
69            sock.setblocking(0)
70            sock.bind(("127.0.0.1",0))
71            sock.listen(2)
72            port = sock.getsockname()[1]
73            self.logger.debug("Created listening socket for qemu serial console on: 127.0.0.1:%s" % port)
74            return (sock, port)
75
76        except socket.error:
77            sock.close()
78            raise
79
80    def log(self, msg):
81        if self.logfile:
82            # It is needed to sanitize the data received from qemu
83            # because is possible to have control characters
84            msg = msg.decode("utf-8", errors='ignore')
85            msg = re_control_char.sub('', msg)
86            self.msg += msg
87            with codecs.open(self.logfile, "a", encoding="utf-8") as f:
88                f.write("%s" % msg)
89
90    def getOutput(self, o):
91        import fcntl
92        fl = fcntl.fcntl(o, fcntl.F_GETFL)
93        fcntl.fcntl(o, fcntl.F_SETFL, fl | os.O_NONBLOCK)
94        return os.read(o.fileno(), 1000000).decode("utf-8")
95
96
97    def handleSIGCHLD(self, signum, frame):
98        if self.runqemu and self.runqemu.poll():
99            if self.runqemu.returncode:
100                self.logger.debug('runqemu exited with code %d' % self.runqemu.returncode)
101                self.logger.debug("Output from runqemu:\n%s" % self.getOutput(self.runqemu.stdout))
102                self.stop()
103                self._dump_host()
104                raise SystemExit
105
106    def start(self, qemuparams = None, get_ip = True, extra_bootparams = None, runqemuparams='', launch_cmd=None, discard_writes=True):
107        env = os.environ.copy()
108        if self.display:
109            env["DISPLAY"] = self.display
110            # Set this flag so that Qemu doesn't do any grabs as SDL grabs
111            # interact badly with screensavers.
112            env["QEMU_DONT_GRAB"] = "1"
113        if not os.path.exists(self.rootfs):
114            self.logger.error("Invalid rootfs %s" % self.rootfs)
115            return False
116        if not os.path.exists(self.tmpdir):
117            self.logger.error("Invalid TMPDIR path %s" % self.tmpdir)
118            return False
119        else:
120            env["OE_TMPDIR"] = self.tmpdir
121        if not os.path.exists(self.deploy_dir_image):
122            self.logger.error("Invalid DEPLOY_DIR_IMAGE path %s" % self.deploy_dir_image)
123            return False
124        else:
125            env["DEPLOY_DIR_IMAGE"] = self.deploy_dir_image
126
127        if not launch_cmd:
128            launch_cmd = 'runqemu %s %s ' % ('snapshot' if discard_writes else '', runqemuparams)
129            if self.use_kvm:
130                self.logger.debug('Using kvm for runqemu')
131                launch_cmd += ' kvm'
132            else:
133                self.logger.debug('Not using kvm for runqemu')
134            if not self.display:
135                launch_cmd += ' nographic'
136            if self.use_slirp:
137                launch_cmd += ' slirp'
138            launch_cmd += ' %s %s' % (self.machine, self.rootfs)
139
140        return self.launch(launch_cmd, qemuparams=qemuparams, get_ip=get_ip, extra_bootparams=extra_bootparams, env=env)
141
142    def launch(self, launch_cmd, get_ip = True, qemuparams = None, extra_bootparams = None, env = None):
143        try:
144            self.threadsock, threadport = self.create_socket()
145            self.server_socket, self.serverport = self.create_socket()
146        except socket.error as msg:
147            self.logger.error("Failed to create listening socket: %s" % msg[1])
148            return False
149
150        bootparams = 'console=tty1 console=ttyS0,115200n8 printk.time=1'
151        if extra_bootparams:
152            bootparams = bootparams + ' ' + extra_bootparams
153
154        # Ask QEMU to store the QEMU process PID in file, this way we don't have to parse running processes
155        # and analyze descendents in order to determine it.
156        if os.path.exists(self.qemu_pidfile):
157            os.remove(self.qemu_pidfile)
158        self.qemuparams = 'bootparams="{0}" qemuparams="-serial tcp:127.0.0.1:{1} -pidfile {2}"'.format(bootparams, threadport, self.qemu_pidfile)
159        if qemuparams:
160            self.qemuparams = self.qemuparams[:-1] + " " + qemuparams + " " + '\"'
161
162        launch_cmd += ' tcpserial=%s %s' % (self.serverport, self.qemuparams)
163
164        self.origchldhandler = signal.getsignal(signal.SIGCHLD)
165        signal.signal(signal.SIGCHLD, self.handleSIGCHLD)
166
167        self.logger.debug('launchcmd=%s'%(launch_cmd))
168
169        # FIXME: We pass in stdin=subprocess.PIPE here to work around stty
170        # blocking at the end of the runqemu script when using this within
171        # oe-selftest (this makes stty error out immediately). There ought
172        # to be a proper fix but this will suffice for now.
173        self.runqemu = subprocess.Popen(launch_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE, preexec_fn=os.setpgrp, env=env)
174        output = self.runqemu.stdout
175
176        #
177        # We need the preexec_fn above so that all runqemu processes can easily be killed
178        # (by killing their process group). This presents a problem if this controlling
179        # process itself is killed however since those processes don't notice the death
180        # of the parent and merrily continue on.
181        #
182        # Rather than hack runqemu to deal with this, we add something here instead.
183        # Basically we fork off another process which holds an open pipe to the parent
184        # and also is setpgrp. If/when the pipe sees EOF from the parent dieing, it kills
185        # the process group. This is like pctrl's PDEATHSIG but for a process group
186        # rather than a single process.
187        #
188        r, w = os.pipe()
189        self.monitorpid = os.fork()
190        if self.monitorpid:
191            os.close(r)
192            self.monitorpipe = os.fdopen(w, "w")
193        else:
194            # child process
195            os.setpgrp()
196            os.close(w)
197            r = os.fdopen(r)
198            x = r.read()
199            os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM)
200            sys.exit(0)
201
202        self.logger.debug("runqemu started, pid is %s" % self.runqemu.pid)
203        self.logger.debug("waiting at most %s seconds for qemu pid (%s)" %
204                          (self.runqemutime, time.strftime("%D %H:%M:%S")))
205        endtime = time.time() + self.runqemutime
206        while not self.is_alive() and time.time() < endtime:
207            if self.runqemu.poll():
208                if self.runqemu.returncode:
209                    # No point waiting any longer
210                    self.logger.debug('runqemu exited with code %d' % self.runqemu.returncode)
211                    self._dump_host()
212                    self.logger.debug("Output from runqemu:\n%s" % self.getOutput(output))
213                    self.stop()
214                    return False
215            time.sleep(0.5)
216
217        if not self.is_alive():
218            self.logger.error("Qemu pid didn't appear in %s seconds (%s)" %
219                              (self.runqemutime, time.strftime("%D %H:%M:%S")))
220            # Dump all processes to help us to figure out what is going on...
221            ps = subprocess.Popen(['ps', 'axww', '-o', 'pid,ppid,command '], stdout=subprocess.PIPE).communicate()[0]
222            processes = ps.decode("utf-8")
223            self.logger.debug("Running processes:\n%s" % processes)
224            self._dump_host()
225            op = self.getOutput(output)
226            self.stop()
227            if op:
228                self.logger.error("Output from runqemu:\n%s" % op)
229            else:
230                self.logger.error("No output from runqemu.\n")
231            return False
232
233        # We are alive: qemu is running
234        out = self.getOutput(output)
235        netconf = False # network configuration is not required by default
236        self.logger.debug("qemu started in %s seconds - qemu procces pid is %s (%s)" %
237                          (time.time() - (endtime - self.runqemutime),
238                           self.qemupid, time.strftime("%D %H:%M:%S")))
239        if get_ip:
240            cmdline = ''
241            with open('/proc/%s/cmdline' % self.qemupid) as p:
242                cmdline = p.read()
243                # It is needed to sanitize the data received
244                # because is possible to have control characters
245                cmdline = re_control_char.sub(' ', cmdline)
246            try:
247                if self.use_slirp:
248                    tcp_ports = cmdline.split("hostfwd=tcp::")[1]
249                    host_port = tcp_ports[:tcp_ports.find('-')]
250                    self.ip = "localhost:%s" % host_port
251                else:
252                    ips = re.findall(r"((?:[0-9]{1,3}\.){3}[0-9]{1,3})", cmdline.split("ip=")[1])
253                    self.ip = ips[0]
254                    self.server_ip = ips[1]
255                self.logger.debug("qemu cmdline used:\n{}".format(cmdline))
256            except (IndexError, ValueError):
257                # Try to get network configuration from runqemu output
258                match = re.match(r'.*Network configuration: ([0-9.]+)::([0-9.]+):([0-9.]+)$.*',
259                                 out, re.MULTILINE|re.DOTALL)
260                if match:
261                    self.ip, self.server_ip, self.netmask = match.groups()
262                    # network configuration is required as we couldn't get it
263                    # from the runqemu command line, so qemu doesn't run kernel
264                    # and guest networking is not configured
265                    netconf = True
266                else:
267                    self.logger.error("Couldn't get ip from qemu command line and runqemu output! "
268                                 "Here is the qemu command line used:\n%s\n"
269                                 "and output from runqemu:\n%s" % (cmdline, out))
270                    self._dump_host()
271                    self.stop()
272                    return False
273
274        self.logger.debug("Target IP: %s" % self.ip)
275        self.logger.debug("Server IP: %s" % self.server_ip)
276
277        self.thread = LoggingThread(self.log, self.threadsock, self.logger)
278        self.thread.start()
279        if not self.thread.connection_established.wait(self.boottime):
280            self.logger.error("Didn't receive a console connection from qemu. "
281                         "Here is the qemu command line used:\n%s\nand "
282                         "output from runqemu:\n%s" % (cmdline, out))
283            self.stop_thread()
284            return False
285
286        self.logger.debug("Output from runqemu:\n%s", out)
287        self.logger.debug("Waiting at most %d seconds for login banner (%s)" %
288                          (self.boottime, time.strftime("%D %H:%M:%S")))
289        endtime = time.time() + self.boottime
290        socklist = [self.server_socket]
291        reachedlogin = False
292        stopread = False
293        qemusock = None
294        bootlog = b''
295        data = b''
296        while time.time() < endtime and not stopread:
297            try:
298                sread, swrite, serror = select.select(socklist, [], [], 5)
299            except InterruptedError:
300                continue
301            for sock in sread:
302                if sock is self.server_socket:
303                    qemusock, addr = self.server_socket.accept()
304                    qemusock.setblocking(0)
305                    socklist.append(qemusock)
306                    socklist.remove(self.server_socket)
307                    self.logger.debug("Connection from %s:%s" % addr)
308                else:
309                    data = data + sock.recv(1024)
310                    if data:
311                        bootlog += data
312                        data = b''
313                        if b' login:' in bootlog:
314                            self.server_socket = qemusock
315                            stopread = True
316                            reachedlogin = True
317                            self.logger.debug("Reached login banner in %s seconds (%s)" %
318                                              (time.time() - (endtime - self.boottime),
319                                              time.strftime("%D %H:%M:%S")))
320                    else:
321                        # no need to check if reachedlogin unless we support multiple connections
322                        self.logger.debug("QEMU socket disconnected before login banner reached. (%s)" %
323                                          time.strftime("%D %H:%M:%S"))
324                        socklist.remove(sock)
325                        sock.close()
326                        stopread = True
327
328
329        if not reachedlogin:
330            if time.time() >= endtime:
331                self.logger.debug("Target didn't reach login banner in %d seconds (%s)" %
332                                  (self.boottime, time.strftime("%D %H:%M:%S")))
333            tail = lambda l: "\n".join(l.splitlines()[-25:])
334            bootlog = bootlog.decode("utf-8")
335            # in case bootlog is empty, use tail qemu log store at self.msg
336            lines = tail(bootlog if bootlog else self.msg)
337            self.logger.debug("Last 25 lines of text:\n%s" % lines)
338            self.logger.debug("Check full boot log: %s" % self.logfile)
339            self._dump_host()
340            self.stop()
341            return False
342
343        # If we are not able to login the tests can continue
344        try:
345            (status, output) = self.run_serial("root\n", raw=True)
346            if re.search(r"root@[a-zA-Z0-9\-]+:~#", output):
347                self.logged = True
348                self.logger.debug("Logged as root in serial console")
349                if netconf:
350                    # configure guest networking
351                    cmd = "ifconfig eth0 %s netmask %s up\n" % (self.ip, self.netmask)
352                    output = self.run_serial(cmd, raw=True)[1]
353                    if re.search(r"root@[a-zA-Z0-9\-]+:~#", output):
354                        self.logger.debug("configured ip address %s", self.ip)
355                    else:
356                        self.logger.debug("Couldn't configure guest networking")
357            else:
358                self.logger.debug("Couldn't login into serial console"
359                            " as root using blank password")
360                self.logger.debug("The output:\n%s" % output)
361        except:
362            self.logger.debug("Serial console failed while trying to login")
363        return True
364
365    def stop(self):
366        if hasattr(self, "origchldhandler"):
367            signal.signal(signal.SIGCHLD, self.origchldhandler)
368        self.stop_thread()
369        self.stop_qemu_system()
370        if self.runqemu:
371            if hasattr(self, "monitorpid"):
372                os.kill(self.monitorpid, signal.SIGKILL)
373                self.logger.debug("Sending SIGTERM to runqemu")
374                try:
375                    os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM)
376                except OSError as e:
377                    if e.errno != errno.ESRCH:
378                        raise
379            endtime = time.time() + self.runqemutime
380            while self.runqemu.poll() is None and time.time() < endtime:
381                time.sleep(1)
382            if self.runqemu.poll() is None:
383                self.logger.debug("Sending SIGKILL to runqemu")
384                os.killpg(os.getpgid(self.runqemu.pid), signal.SIGKILL)
385            self.runqemu.stdin.close()
386            self.runqemu.stdout.close()
387            self.runqemu = None
388
389        if hasattr(self, 'server_socket') and self.server_socket:
390            self.server_socket.close()
391            self.server_socket = None
392        if hasattr(self, 'threadsock') and self.threadsock:
393            self.threadsock.close()
394            self.threadsock = None
395        self.qemupid = None
396        self.ip = None
397        if os.path.exists(self.qemu_pidfile):
398            os.remove(self.qemu_pidfile)
399        if self.monitorpipe:
400            self.monitorpipe.close()
401
402    def stop_qemu_system(self):
403        if self.qemupid:
404            try:
405                # qemu-system behaves well and a SIGTERM is enough
406                os.kill(self.qemupid, signal.SIGTERM)
407            except ProcessLookupError as e:
408                self.logger.warning('qemu-system ended unexpectedly')
409
410    def stop_thread(self):
411        if self.thread and self.thread.is_alive():
412            self.thread.stop()
413            self.thread.join()
414
415    def restart(self, qemuparams = None):
416        self.logger.debug("Restarting qemu process")
417        if self.runqemu.poll() is None:
418            self.stop()
419        if self.start(qemuparams):
420            return True
421        return False
422
423    def is_alive(self):
424        if not self.runqemu or self.runqemu.poll() is not None:
425            return False
426        if os.path.isfile(self.qemu_pidfile):
427            f = open(self.qemu_pidfile, 'r')
428            qemu_pid = f.read()
429            f.close()
430            qemupid = int(qemu_pid)
431            if os.path.exists("/proc/" + str(qemupid)):
432                self.qemupid = qemupid
433                return True
434        return False
435
436    def run_serial(self, command, raw=False, timeout=60):
437        # We assume target system have echo to get command status
438        if not raw:
439            command = "%s; echo $?\n" % command
440
441        data = ''
442        status = 0
443        self.server_socket.sendall(command.encode('utf-8'))
444        start = time.time()
445        end = start + timeout
446        while True:
447            now = time.time()
448            if now >= end:
449                data += "<<< run_serial(): command timed out after %d seconds without output >>>\r\n\r\n" % timeout
450                break
451            try:
452                sread, _, _ = select.select([self.server_socket],[],[], end - now)
453            except InterruptedError:
454                continue
455            if sread:
456                answer = self.server_socket.recv(1024)
457                if answer:
458                    data += answer.decode('utf-8')
459                    # Search the prompt to stop
460                    if re.search(r"[a-zA-Z0-9]+@[a-zA-Z0-9\-]+:~#", data):
461                        break
462                else:
463                    raise Exception("No data on serial console socket")
464
465        if data:
466            if raw:
467                status = 1
468            else:
469                # Remove first line (command line) and last line (prompt)
470                data = data[data.find('$?\r\n')+4:data.rfind('\r\n')]
471                index = data.rfind('\r\n')
472                if index == -1:
473                    status_cmd = data
474                    data = ""
475                else:
476                    status_cmd = data[index+2:]
477                    data = data[:index]
478                if (status_cmd == "0"):
479                    status = 1
480        return (status, str(data))
481
482
483    def _dump_host(self):
484        self.host_dumper.create_dir("qemu")
485        self.logger.warning("Qemu ended unexpectedly, dump data from host"
486                " is in %s" % self.host_dumper.dump_dir)
487        self.host_dumper.dump_host()
488
489# This class is for reading data from a socket and passing it to logfunc
490# to be processed. It's completely event driven and has a straightforward
491# event loop. The mechanism for stopping the thread is a simple pipe which
492# will wake up the poll and allow for tearing everything down.
493class LoggingThread(threading.Thread):
494    def __init__(self, logfunc, sock, logger):
495        self.connection_established = threading.Event()
496        self.serversock = sock
497        self.logfunc = logfunc
498        self.logger = logger
499        self.readsock = None
500        self.running = False
501
502        self.errorevents = select.POLLERR | select.POLLHUP | select.POLLNVAL
503        self.readevents = select.POLLIN | select.POLLPRI
504
505        threading.Thread.__init__(self, target=self.threadtarget)
506
507    def threadtarget(self):
508        try:
509            self.eventloop()
510        finally:
511            self.teardown()
512
513    def run(self):
514        self.logger.debug("Starting logging thread")
515        self.readpipe, self.writepipe = os.pipe()
516        threading.Thread.run(self)
517
518    def stop(self):
519        self.logger.debug("Stopping logging thread")
520        if self.running:
521            os.write(self.writepipe, bytes("stop", "utf-8"))
522
523    def teardown(self):
524        self.logger.debug("Tearing down logging thread")
525        self.close_socket(self.serversock)
526
527        if self.readsock is not None:
528            self.close_socket(self.readsock)
529
530        self.close_ignore_error(self.readpipe)
531        self.close_ignore_error(self.writepipe)
532        self.running = False
533
534    def eventloop(self):
535        poll = select.poll()
536        event_read_mask = self.errorevents | self.readevents
537        poll.register(self.serversock.fileno())
538        poll.register(self.readpipe, event_read_mask)
539
540        breakout = False
541        self.running = True
542        self.logger.debug("Starting thread event loop")
543        while not breakout:
544            events = poll.poll()
545            for event in events:
546                # An error occurred, bail out
547                if event[1] & self.errorevents:
548                    raise Exception(self.stringify_event(event[1]))
549
550                # Event to stop the thread
551                if self.readpipe == event[0]:
552                    self.logger.debug("Stop event received")
553                    breakout = True
554                    break
555
556                # A connection request was received
557                elif self.serversock.fileno() == event[0]:
558                    self.logger.debug("Connection request received")
559                    self.readsock, _ = self.serversock.accept()
560                    self.readsock.setblocking(0)
561                    poll.unregister(self.serversock.fileno())
562                    poll.register(self.readsock.fileno(), event_read_mask)
563
564                    self.logger.debug("Setting connection established event")
565                    self.connection_established.set()
566
567                # Actual data to be logged
568                elif self.readsock.fileno() == event[0]:
569                    data = self.recv(1024)
570                    self.logfunc(data)
571
572    # Since the socket is non-blocking make sure to honor EAGAIN
573    # and EWOULDBLOCK.
574    def recv(self, count):
575        try:
576            data = self.readsock.recv(count)
577        except socket.error as e:
578            if e.errno == errno.EAGAIN or e.errno == errno.EWOULDBLOCK:
579                return ''
580            else:
581                raise
582
583        if data is None:
584            raise Exception("No data on read ready socket")
585        elif not data:
586            # This actually means an orderly shutdown
587            # happened. But for this code it counts as an
588            # error since the connection shouldn't go away
589            # until qemu exits.
590            raise Exception("Console connection closed unexpectedly")
591
592        return data
593
594    def stringify_event(self, event):
595        val = ''
596        if select.POLLERR == event:
597            val = 'POLLER'
598        elif select.POLLHUP == event:
599            val = 'POLLHUP'
600        elif select.POLLNVAL == event:
601            val = 'POLLNVAL'
602        return val
603
604    def close_socket(self, sock):
605        sock.shutdown(socket.SHUT_RDWR)
606        sock.close()
607
608    def close_ignore_error(self, fd):
609        try:
610            os.close(fd)
611        except OSError:
612            pass
613