1# Copyright (C) 2013 Intel Corporation
2#
3# Released under the MIT license (see COPYING.MIT)
4
5# This module provides a class for starting qemu images using runqemu.
6# It's used by testimage.bbclass.
7
8import subprocess
9import os
10import sys
11import time
12import signal
13import re
14import socket
15import select
16import errno
17import string
18import threading
19import codecs
20import logging
21from oeqa.utils.dump import HostDumper
22
23# Get Unicode non printable control chars
24control_range = list(range(0,32))+list(range(127,160))
25control_chars = [chr(x) for x in control_range
26                if chr(x) not in string.printable]
27re_control_char = re.compile('[%s]' % re.escape("".join(control_chars)))
28
29class QemuRunner:
30
31    def __init__(self, machine, rootfs, display, tmpdir, deploy_dir_image, logfile, boottime, dump_dir, dump_host_cmds,
32                 use_kvm, logger, use_slirp=False):
33
34        # Popen object for runqemu
35        self.runqemu = None
36        # pid of the qemu process that runqemu will start
37        self.qemupid = None
38        # target ip - from the command line or runqemu output
39        self.ip = None
40        # host ip - where qemu is running
41        self.server_ip = None
42        # target ip netmask
43        self.netmask = None
44
45        self.machine = machine
46        self.rootfs = rootfs
47        self.display = display
48        self.tmpdir = tmpdir
49        self.deploy_dir_image = deploy_dir_image
50        self.logfile = logfile
51        self.boottime = boottime
52        self.logged = False
53        self.thread = None
54        self.use_kvm = use_kvm
55        self.use_slirp = use_slirp
56        self.msg = ''
57
58        self.runqemutime = 120
59        self.qemu_pidfile = 'pidfile_'+str(os.getpid())
60        self.host_dumper = HostDumper(dump_host_cmds, dump_dir)
61
62        self.logger = logger
63
64    def create_socket(self):
65        try:
66            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
67            sock.setblocking(0)
68            sock.bind(("127.0.0.1",0))
69            sock.listen(2)
70            port = sock.getsockname()[1]
71            self.logger.debug("Created listening socket for qemu serial console on: 127.0.0.1:%s" % port)
72            return (sock, port)
73
74        except socket.error:
75            sock.close()
76            raise
77
78    def log(self, msg):
79        if self.logfile:
80            # It is needed to sanitize the data received from qemu
81            # because is possible to have control characters
82            msg = msg.decode("utf-8", errors='ignore')
83            msg = re_control_char.sub('', msg)
84            self.msg += msg
85            with codecs.open(self.logfile, "a", encoding="utf-8") as f:
86                f.write("%s" % msg)
87
88    def getOutput(self, o):
89        import fcntl
90        fl = fcntl.fcntl(o, fcntl.F_GETFL)
91        fcntl.fcntl(o, fcntl.F_SETFL, fl | os.O_NONBLOCK)
92        return os.read(o.fileno(), 1000000).decode("utf-8")
93
94
95    def handleSIGCHLD(self, signum, frame):
96        if self.runqemu and self.runqemu.poll():
97            if self.runqemu.returncode:
98                self.logger.debug('runqemu exited with code %d' % self.runqemu.returncode)
99                self.logger.debug("Output from runqemu:\n%s" % self.getOutput(self.runqemu.stdout))
100                self.stop()
101                self._dump_host()
102                raise SystemExit
103
104    def start(self, qemuparams = None, get_ip = True, extra_bootparams = None, runqemuparams='', launch_cmd=None, discard_writes=True):
105        env = os.environ.copy()
106        if self.display:
107            env["DISPLAY"] = self.display
108            # Set this flag so that Qemu doesn't do any grabs as SDL grabs
109            # interact badly with screensavers.
110            env["QEMU_DONT_GRAB"] = "1"
111        if not os.path.exists(self.rootfs):
112            self.logger.error("Invalid rootfs %s" % self.rootfs)
113            return False
114        if not os.path.exists(self.tmpdir):
115            self.logger.error("Invalid TMPDIR path %s" % self.tmpdir)
116            return False
117        else:
118            env["OE_TMPDIR"] = self.tmpdir
119        if not os.path.exists(self.deploy_dir_image):
120            self.logger.error("Invalid DEPLOY_DIR_IMAGE path %s" % self.deploy_dir_image)
121            return False
122        else:
123            env["DEPLOY_DIR_IMAGE"] = self.deploy_dir_image
124
125        if not launch_cmd:
126            launch_cmd = 'runqemu %s %s ' % ('snapshot' if discard_writes else '', runqemuparams)
127            if self.use_kvm:
128                self.logger.debug('Using kvm for runqemu')
129                launch_cmd += ' kvm'
130            else:
131                self.logger.debug('Not using kvm for runqemu')
132            if not self.display:
133                launch_cmd += ' nographic'
134            if self.use_slirp:
135                launch_cmd += ' slirp'
136            launch_cmd += ' %s %s' % (self.machine, self.rootfs)
137
138        return self.launch(launch_cmd, qemuparams=qemuparams, get_ip=get_ip, extra_bootparams=extra_bootparams, env=env)
139
140    def launch(self, launch_cmd, get_ip = True, qemuparams = None, extra_bootparams = None, env = None):
141        try:
142            self.threadsock, threadport = self.create_socket()
143            self.server_socket, self.serverport = self.create_socket()
144        except socket.error as msg:
145            self.logger.error("Failed to create listening socket: %s" % msg[1])
146            return False
147
148        bootparams = 'console=tty1 console=ttyS0,115200n8 printk.time=1'
149        if extra_bootparams:
150            bootparams = bootparams + ' ' + extra_bootparams
151
152        # Ask QEMU to store the QEMU process PID in file, this way we don't have to parse running processes
153        # and analyze descendents in order to determine it.
154        if os.path.exists(self.qemu_pidfile):
155            os.remove(self.qemu_pidfile)
156        self.qemuparams = 'bootparams="{0}" qemuparams="-serial tcp:127.0.0.1:{1} -pidfile {2}"'.format(bootparams, threadport, self.qemu_pidfile)
157        if qemuparams:
158            self.qemuparams = self.qemuparams[:-1] + " " + qemuparams + " " + '\"'
159
160        launch_cmd += ' tcpserial=%s %s' % (self.serverport, self.qemuparams)
161
162        self.origchldhandler = signal.getsignal(signal.SIGCHLD)
163        signal.signal(signal.SIGCHLD, self.handleSIGCHLD)
164
165        self.logger.debug('launchcmd=%s'%(launch_cmd))
166
167        # FIXME: We pass in stdin=subprocess.PIPE here to work around stty
168        # blocking at the end of the runqemu script when using this within
169        # oe-selftest (this makes stty error out immediately). There ought
170        # to be a proper fix but this will suffice for now.
171        self.runqemu = subprocess.Popen(launch_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE, preexec_fn=os.setpgrp, env=env)
172        output = self.runqemu.stdout
173
174        #
175        # We need the preexec_fn above so that all runqemu processes can easily be killed
176        # (by killing their process group). This presents a problem if this controlling
177        # process itself is killed however since those processes don't notice the death
178        # of the parent and merrily continue on.
179        #
180        # Rather than hack runqemu to deal with this, we add something here instead.
181        # Basically we fork off another process which holds an open pipe to the parent
182        # and also is setpgrp. If/when the pipe sees EOF from the parent dieing, it kills
183        # the process group. This is like pctrl's PDEATHSIG but for a process group
184        # rather than a single process.
185        #
186        r, w = os.pipe()
187        self.monitorpid = os.fork()
188        if self.monitorpid:
189            os.close(r)
190            self.monitorpipe = os.fdopen(w, "w")
191        else:
192            # child process
193            os.setpgrp()
194            os.close(w)
195            r = os.fdopen(r)
196            x = r.read()
197            os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM)
198            sys.exit(0)
199
200        self.logger.debug("runqemu started, pid is %s" % self.runqemu.pid)
201        self.logger.debug("waiting at most %s seconds for qemu pid (%s)" %
202                          (self.runqemutime, time.strftime("%D %H:%M:%S")))
203        endtime = time.time() + self.runqemutime
204        while not self.is_alive() and time.time() < endtime:
205            if self.runqemu.poll():
206                if self.runqemu.returncode:
207                    # No point waiting any longer
208                    self.logger.debug('runqemu exited with code %d' % self.runqemu.returncode)
209                    self._dump_host()
210                    self.logger.debug("Output from runqemu:\n%s" % self.getOutput(output))
211                    self.stop()
212                    return False
213            time.sleep(0.5)
214
215        if not self.is_alive():
216            self.logger.error("Qemu pid didn't appear in %s seconds (%s)" %
217                              (self.runqemutime, time.strftime("%D %H:%M:%S")))
218            # Dump all processes to help us to figure out what is going on...
219            ps = subprocess.Popen(['ps', 'axww', '-o', 'pid,ppid,command '], stdout=subprocess.PIPE).communicate()[0]
220            processes = ps.decode("utf-8")
221            self.logger.debug("Running processes:\n%s" % processes)
222            self._dump_host()
223            op = self.getOutput(output)
224            self.stop()
225            if op:
226                self.logger.error("Output from runqemu:\n%s" % op)
227            else:
228                self.logger.error("No output from runqemu.\n")
229            return False
230
231        # We are alive: qemu is running
232        out = self.getOutput(output)
233        netconf = False # network configuration is not required by default
234        self.logger.debug("qemu started in %s seconds - qemu procces pid is %s (%s)" %
235                          (time.time() - (endtime - self.runqemutime),
236                           self.qemupid, time.strftime("%D %H:%M:%S")))
237        if get_ip:
238            cmdline = ''
239            with open('/proc/%s/cmdline' % self.qemupid) as p:
240                cmdline = p.read()
241                # It is needed to sanitize the data received
242                # because is possible to have control characters
243                cmdline = re_control_char.sub(' ', cmdline)
244            try:
245                if self.use_slirp:
246                    tcp_ports = cmdline.split("hostfwd=tcp::")[1]
247                    host_port = tcp_ports[:tcp_ports.find('-')]
248                    self.ip = "localhost:%s" % host_port
249                else:
250                    ips = re.findall(r"((?:[0-9]{1,3}\.){3}[0-9]{1,3})", cmdline.split("ip=")[1])
251                    self.ip = ips[0]
252                    self.server_ip = ips[1]
253                self.logger.debug("qemu cmdline used:\n{}".format(cmdline))
254            except (IndexError, ValueError):
255                # Try to get network configuration from runqemu output
256                match = re.match(r'.*Network configuration: ([0-9.]+)::([0-9.]+):([0-9.]+)$.*',
257                                 out, re.MULTILINE|re.DOTALL)
258                if match:
259                    self.ip, self.server_ip, self.netmask = match.groups()
260                    # network configuration is required as we couldn't get it
261                    # from the runqemu command line, so qemu doesn't run kernel
262                    # and guest networking is not configured
263                    netconf = True
264                else:
265                    self.logger.error("Couldn't get ip from qemu command line and runqemu output! "
266                                 "Here is the qemu command line used:\n%s\n"
267                                 "and output from runqemu:\n%s" % (cmdline, out))
268                    self._dump_host()
269                    self.stop()
270                    return False
271
272        self.logger.debug("Target IP: %s" % self.ip)
273        self.logger.debug("Server IP: %s" % self.server_ip)
274
275        self.thread = LoggingThread(self.log, self.threadsock, self.logger)
276        self.thread.start()
277        if not self.thread.connection_established.wait(self.boottime):
278            self.logger.error("Didn't receive a console connection from qemu. "
279                         "Here is the qemu command line used:\n%s\nand "
280                         "output from runqemu:\n%s" % (cmdline, out))
281            self.stop_thread()
282            return False
283
284        self.logger.debug("Output from runqemu:\n%s", out)
285        self.logger.debug("Waiting at most %d seconds for login banner (%s)" %
286                          (self.boottime, time.strftime("%D %H:%M:%S")))
287        endtime = time.time() + self.boottime
288        socklist = [self.server_socket]
289        reachedlogin = False
290        stopread = False
291        qemusock = None
292        bootlog = b''
293        data = b''
294        while time.time() < endtime and not stopread:
295            try:
296                sread, swrite, serror = select.select(socklist, [], [], 5)
297            except InterruptedError:
298                continue
299            for sock in sread:
300                if sock is self.server_socket:
301                    qemusock, addr = self.server_socket.accept()
302                    qemusock.setblocking(0)
303                    socklist.append(qemusock)
304                    socklist.remove(self.server_socket)
305                    self.logger.debug("Connection from %s:%s" % addr)
306                else:
307                    data = data + sock.recv(1024)
308                    if data:
309                        bootlog += data
310                        data = b''
311                        if b' login:' in bootlog:
312                            self.server_socket = qemusock
313                            stopread = True
314                            reachedlogin = True
315                            self.logger.debug("Reached login banner in %s seconds (%s)" %
316                                              (time.time() - (endtime - self.boottime),
317                                              time.strftime("%D %H:%M:%S")))
318                    else:
319                        # no need to check if reachedlogin unless we support multiple connections
320                        self.logger.debug("QEMU socket disconnected before login banner reached. (%s)" %
321                                          time.strftime("%D %H:%M:%S"))
322                        socklist.remove(sock)
323                        sock.close()
324                        stopread = True
325
326
327        if not reachedlogin:
328            if time.time() >= endtime:
329                self.logger.debug("Target didn't reach login banner in %d seconds (%s)" %
330                                  (self.boottime, time.strftime("%D %H:%M:%S")))
331            tail = lambda l: "\n".join(l.splitlines()[-25:])
332            bootlog = bootlog.decode("utf-8")
333            # in case bootlog is empty, use tail qemu log store at self.msg
334            lines = tail(bootlog if bootlog else self.msg)
335            self.logger.debug("Last 25 lines of text:\n%s" % lines)
336            self.logger.debug("Check full boot log: %s" % self.logfile)
337            self._dump_host()
338            self.stop()
339            return False
340
341        # If we are not able to login the tests can continue
342        try:
343            (status, output) = self.run_serial("root\n", raw=True)
344            if re.search(r"root@[a-zA-Z0-9\-]+:~#", output):
345                self.logged = True
346                self.logger.debug("Logged as root in serial console")
347                if netconf:
348                    # configure guest networking
349                    cmd = "ifconfig eth0 %s netmask %s up\n" % (self.ip, self.netmask)
350                    output = self.run_serial(cmd, raw=True)[1]
351                    if re.search(r"root@[a-zA-Z0-9\-]+:~#", output):
352                        self.logger.debug("configured ip address %s", self.ip)
353                    else:
354                        self.logger.debug("Couldn't configure guest networking")
355            else:
356                self.logger.debug("Couldn't login into serial console"
357                            " as root using blank password")
358                self.logger.debug("The output:\n%s" % output)
359        except:
360            self.logger.debug("Serial console failed while trying to login")
361        return True
362
363    def stop(self):
364        if hasattr(self, "origchldhandler"):
365            signal.signal(signal.SIGCHLD, self.origchldhandler)
366        self.stop_thread()
367        self.stop_qemu_system()
368        if self.runqemu:
369            if hasattr(self, "monitorpid"):
370                os.kill(self.monitorpid, signal.SIGKILL)
371                self.logger.debug("Sending SIGTERM to runqemu")
372                try:
373                    os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM)
374                except OSError as e:
375                    if e.errno != errno.ESRCH:
376                        raise
377            endtime = time.time() + self.runqemutime
378            while self.runqemu.poll() is None and time.time() < endtime:
379                time.sleep(1)
380            if self.runqemu.poll() is None:
381                self.logger.debug("Sending SIGKILL to runqemu")
382                os.killpg(os.getpgid(self.runqemu.pid), signal.SIGKILL)
383            self.runqemu.stdin.close()
384            self.runqemu.stdout.close()
385            self.runqemu = None
386
387        if hasattr(self, 'server_socket') and self.server_socket:
388            self.server_socket.close()
389            self.server_socket = None
390        if hasattr(self, 'threadsock') and self.threadsock:
391            self.threadsock.close()
392            self.threadsock = None
393        self.qemupid = None
394        self.ip = None
395        if os.path.exists(self.qemu_pidfile):
396            os.remove(self.qemu_pidfile)
397        if self.monitorpipe:
398            self.monitorpipe.close()
399
400    def stop_qemu_system(self):
401        if self.qemupid:
402            try:
403                # qemu-system behaves well and a SIGTERM is enough
404                os.kill(self.qemupid, signal.SIGTERM)
405            except ProcessLookupError as e:
406                self.logger.warning('qemu-system ended unexpectedly')
407
408    def stop_thread(self):
409        if self.thread and self.thread.is_alive():
410            self.thread.stop()
411            self.thread.join()
412
413    def restart(self, qemuparams = None):
414        self.logger.debug("Restarting qemu process")
415        if self.runqemu.poll() is None:
416            self.stop()
417        if self.start(qemuparams):
418            return True
419        return False
420
421    def is_alive(self):
422        if not self.runqemu or self.runqemu.poll() is not None:
423            return False
424        if os.path.isfile(self.qemu_pidfile):
425            f = open(self.qemu_pidfile, 'r')
426            qemu_pid = f.read()
427            f.close()
428            qemupid = int(qemu_pid)
429            if os.path.exists("/proc/" + str(qemupid)):
430                self.qemupid = qemupid
431                return True
432        return False
433
434    def run_serial(self, command, raw=False, timeout=60):
435        # We assume target system have echo to get command status
436        if not raw:
437            command = "%s; echo $?\n" % command
438
439        data = ''
440        status = 0
441        self.server_socket.sendall(command.encode('utf-8'))
442        start = time.time()
443        end = start + timeout
444        while True:
445            now = time.time()
446            if now >= end:
447                data += "<<< run_serial(): command timed out after %d seconds without output >>>\r\n\r\n" % timeout
448                break
449            try:
450                sread, _, _ = select.select([self.server_socket],[],[], end - now)
451            except InterruptedError:
452                continue
453            if sread:
454                answer = self.server_socket.recv(1024)
455                if answer:
456                    data += answer.decode('utf-8')
457                    # Search the prompt to stop
458                    if re.search(r"[a-zA-Z0-9]+@[a-zA-Z0-9\-]+:~#", data):
459                        break
460                else:
461                    raise Exception("No data on serial console socket")
462
463        if data:
464            if raw:
465                status = 1
466            else:
467                # Remove first line (command line) and last line (prompt)
468                data = data[data.find('$?\r\n')+4:data.rfind('\r\n')]
469                index = data.rfind('\r\n')
470                if index == -1:
471                    status_cmd = data
472                    data = ""
473                else:
474                    status_cmd = data[index+2:]
475                    data = data[:index]
476                if (status_cmd == "0"):
477                    status = 1
478        return (status, str(data))
479
480
481    def _dump_host(self):
482        self.host_dumper.create_dir("qemu")
483        self.logger.warning("Qemu ended unexpectedly, dump data from host"
484                " is in %s" % self.host_dumper.dump_dir)
485        self.host_dumper.dump_host()
486
487# This class is for reading data from a socket and passing it to logfunc
488# to be processed. It's completely event driven and has a straightforward
489# event loop. The mechanism for stopping the thread is a simple pipe which
490# will wake up the poll and allow for tearing everything down.
491class LoggingThread(threading.Thread):
492    def __init__(self, logfunc, sock, logger):
493        self.connection_established = threading.Event()
494        self.serversock = sock
495        self.logfunc = logfunc
496        self.logger = logger
497        self.readsock = None
498        self.running = False
499
500        self.errorevents = select.POLLERR | select.POLLHUP | select.POLLNVAL
501        self.readevents = select.POLLIN | select.POLLPRI
502
503        threading.Thread.__init__(self, target=self.threadtarget)
504
505    def threadtarget(self):
506        try:
507            self.eventloop()
508        finally:
509            self.teardown()
510
511    def run(self):
512        self.logger.debug("Starting logging thread")
513        self.readpipe, self.writepipe = os.pipe()
514        threading.Thread.run(self)
515
516    def stop(self):
517        self.logger.debug("Stopping logging thread")
518        if self.running:
519            os.write(self.writepipe, bytes("stop", "utf-8"))
520
521    def teardown(self):
522        self.logger.debug("Tearing down logging thread")
523        self.close_socket(self.serversock)
524
525        if self.readsock is not None:
526            self.close_socket(self.readsock)
527
528        self.close_ignore_error(self.readpipe)
529        self.close_ignore_error(self.writepipe)
530        self.running = False
531
532    def eventloop(self):
533        poll = select.poll()
534        event_read_mask = self.errorevents | self.readevents
535        poll.register(self.serversock.fileno())
536        poll.register(self.readpipe, event_read_mask)
537
538        breakout = False
539        self.running = True
540        self.logger.debug("Starting thread event loop")
541        while not breakout:
542            events = poll.poll()
543            for event in events:
544                # An error occurred, bail out
545                if event[1] & self.errorevents:
546                    raise Exception(self.stringify_event(event[1]))
547
548                # Event to stop the thread
549                if self.readpipe == event[0]:
550                    self.logger.debug("Stop event received")
551                    breakout = True
552                    break
553
554                # A connection request was received
555                elif self.serversock.fileno() == event[0]:
556                    self.logger.debug("Connection request received")
557                    self.readsock, _ = self.serversock.accept()
558                    self.readsock.setblocking(0)
559                    poll.unregister(self.serversock.fileno())
560                    poll.register(self.readsock.fileno(), event_read_mask)
561
562                    self.logger.debug("Setting connection established event")
563                    self.connection_established.set()
564
565                # Actual data to be logged
566                elif self.readsock.fileno() == event[0]:
567                    data = self.recv(1024)
568                    self.logfunc(data)
569
570    # Since the socket is non-blocking make sure to honor EAGAIN
571    # and EWOULDBLOCK.
572    def recv(self, count):
573        try:
574            data = self.readsock.recv(count)
575        except socket.error as e:
576            if e.errno == errno.EAGAIN or e.errno == errno.EWOULDBLOCK:
577                return ''
578            else:
579                raise
580
581        if data is None:
582            raise Exception("No data on read ready socket")
583        elif not data:
584            # This actually means an orderly shutdown
585            # happened. But for this code it counts as an
586            # error since the connection shouldn't go away
587            # until qemu exits.
588            raise Exception("Console connection closed unexpectedly")
589
590        return data
591
592    def stringify_event(self, event):
593        val = ''
594        if select.POLLERR == event:
595            val = 'POLLER'
596        elif select.POLLHUP == event:
597            val = 'POLLHUP'
598        elif select.POLLNVAL == event:
599            val = 'POLLNVAL'
600        return val
601
602    def close_socket(self, sock):
603        sock.shutdown(socket.SHUT_RDWR)
604        sock.close()
605
606    def close_ignore_error(self, fd):
607        try:
608            os.close(fd)
609        except OSError:
610            pass
611