1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3
4"""
5This takes a crashing qtest trace and tries to remove superflous operations
6"""
7
8import sys
9import os
10import subprocess
11import time
12import struct
13
14QEMU_ARGS = None
15QEMU_PATH = None
16TIMEOUT = 5
17CRASH_TOKEN = None
18
19# Minimization levels
20M1 = False # try removing IO commands iteratively
21M2 = False # try setting bits in operand of write/out to zero
22
23write_suffix_lookup = {"b": (1, "B"),
24                       "w": (2, "H"),
25                       "l": (4, "L"),
26                       "q": (8, "Q")}
27
28def usage():
29    sys.exit("""\
30Usage:
31
32QEMU_PATH="/path/to/qemu" QEMU_ARGS="args" {} [Options] input_trace output_trace
33
34By default, will try to use the second-to-last line in the output to identify
35whether the crash occred. Optionally, manually set a string that idenitifes the
36crash by setting CRASH_TOKEN=
37
38Options:
39
40-M1: enable a loop around the remove minimizer, which may help decrease some
41     timing dependant instructions. Off by default.
42-M2: try setting bits in operand of write/out to zero. Off by default.
43
44""".format((sys.argv[0])))
45
46deduplication_note = """\n\
47Note: While trimming the input, sometimes the mutated trace triggers a different
48type crash but indicates the same bug. Under this situation, our minimizer is
49incapable of recognizing and stopped from removing it. In the future, we may
50use a more sophisticated crash case deduplication method.
51\n"""
52
53def check_if_trace_crashes(trace, path):
54    with open(path, "w") as tracefile:
55        tracefile.write("".join(trace))
56
57    rc = subprocess.Popen("timeout -s 9 {timeout}s {qemu_path} {qemu_args} 2>&1\
58    < {trace_path}".format(timeout=TIMEOUT,
59                           qemu_path=QEMU_PATH,
60                           qemu_args=QEMU_ARGS,
61                           trace_path=path),
62                          shell=True,
63                          stdin=subprocess.PIPE,
64                          stdout=subprocess.PIPE,
65                          encoding="utf-8")
66    global CRASH_TOKEN
67    if CRASH_TOKEN is None:
68        try:
69            outs, _ = rc.communicate(timeout=5)
70            CRASH_TOKEN = " ".join(outs.splitlines()[-2].split()[0:3])
71        except subprocess.TimeoutExpired:
72            print("subprocess.TimeoutExpired")
73            return False
74        print("Identifying Crashes by this string: {}".format(CRASH_TOKEN))
75        global deduplication_note
76        print(deduplication_note)
77        return True
78
79    for line in iter(rc.stdout.readline, ""):
80        if "CLOSED" in line:
81            return False
82        if CRASH_TOKEN in line:
83            return True
84
85    print("\nWarning:")
86    print("  There is no 'CLOSED'or CRASH_TOKEN in the stdout of subprocess.")
87    print("  Usually this indicates a different type of crash.\n")
88    return False
89
90
91def remove_lines(newtrace, outpath):
92    remove_step = 1
93    i = 0
94    while i < len(newtrace):
95        # 1.) Try to remove lines completely and reproduce the crash.
96        # If it works, we're done.
97        if (i+remove_step) >= len(newtrace):
98            remove_step = 1
99        prior = newtrace[i:i+remove_step]
100        for j in range(i, i+remove_step):
101            newtrace[j] = ""
102        print("Removing {lines} ...\n".format(lines=prior))
103        if check_if_trace_crashes(newtrace, outpath):
104            i += remove_step
105            # Double the number of lines to remove for next round
106            remove_step *= 2
107            continue
108        # Failed to remove multiple IOs, fast recovery
109        if remove_step > 1:
110            for j in range(i, i+remove_step):
111                newtrace[j] = prior[j-i]
112            remove_step = 1
113            continue
114        newtrace[i] = prior[0] # remove_step = 1
115
116        # 2.) Try to replace write{bwlq} commands with a write addr, len
117        # command. Since this can require swapping endianness, try both LE and
118        # BE options. We do this, so we can "trim" the writes in (3)
119
120        if (newtrace[i].startswith("write") and not
121            newtrace[i].startswith("write ")):
122            suffix = newtrace[i].split()[0][-1]
123            assert(suffix in write_suffix_lookup)
124            addr = int(newtrace[i].split()[1], 16)
125            value = int(newtrace[i].split()[2], 16)
126            for endianness in ['<', '>']:
127                data = struct.pack("{end}{size}".format(end=endianness,
128                                   size=write_suffix_lookup[suffix][1]),
129                                   value)
130                newtrace[i] = "write {addr} {size} 0x{data}\n".format(
131                    addr=hex(addr),
132                    size=hex(write_suffix_lookup[suffix][0]),
133                    data=data.hex())
134                if(check_if_trace_crashes(newtrace, outpath)):
135                    break
136            else:
137                newtrace[i] = prior[0]
138
139        # 3.) If it is a qtest write command: write addr len data, try to split
140        # it into two separate write commands. If splitting the data operand
141        # from length/2^n bytes to the left does not work, try to move the pivot
142        # to the right side, then add one to n, until length/2^n == 0. The idea
143        # is to prune unneccessary bytes from long writes, while accommodating
144        # arbitrary MemoryRegion access sizes and alignments.
145
146        # This algorithm will fail under some rare situations.
147        # e.g., xxxxxxxxxuxxxxxx (u is the unnecessary byte)
148
149        if newtrace[i].startswith("write "):
150            addr = int(newtrace[i].split()[1], 16)
151            length = int(newtrace[i].split()[2], 16)
152            data = newtrace[i].split()[3][2:]
153            if length > 1:
154                leftlength = int(length/2)
155                rightlength = length - leftlength
156                newtrace.insert(i+1, "")
157                power = 1
158                while leftlength > 0:
159                    newtrace[i] = "write {addr} {size} 0x{data}\n".format(
160                            addr=hex(addr),
161                            size=hex(leftlength),
162                            data=data[:leftlength*2])
163                    newtrace[i+1] = "write {addr} {size} 0x{data}\n".format(
164                            addr=hex(addr+leftlength),
165                            size=hex(rightlength),
166                            data=data[leftlength*2:])
167                    if check_if_trace_crashes(newtrace, outpath):
168                        break
169                    # move the pivot to right side
170                    if leftlength < rightlength:
171                        rightlength, leftlength = leftlength, rightlength
172                        continue
173                    power += 1
174                    leftlength = int(length/pow(2, power))
175                    rightlength = length - leftlength
176                if check_if_trace_crashes(newtrace, outpath):
177                    i -= 1
178                else:
179                    newtrace[i] = prior[0]
180                    del newtrace[i+1]
181        i += 1
182
183
184def clear_bits(newtrace, outpath):
185    # try setting bits in operands of out/write to zero
186    i = 0
187    while i < len(newtrace):
188        if (not newtrace[i].startswith("write ") and not
189           newtrace[i].startswith("out")):
190           i += 1
191           continue
192        # write ADDR SIZE DATA
193        # outx ADDR VALUE
194        print("\nzero setting bits: {}".format(newtrace[i]))
195
196        prefix = " ".join(newtrace[i].split()[:-1])
197        data = newtrace[i].split()[-1]
198        data_bin = bin(int(data, 16))
199        data_bin_list = list(data_bin)
200
201        for j in range(2, len(data_bin_list)):
202            prior = newtrace[i]
203            if (data_bin_list[j] == '1'):
204                data_bin_list[j] = '0'
205                data_try = hex(int("".join(data_bin_list), 2))
206                # It seems qtest only accepts padded hex-values.
207                if len(data_try) % 2 == 1:
208                    data_try = data_try[:2] + "0" + data_try[2:-1]
209
210                newtrace[i] = "{prefix} {data_try}\n".format(
211                        prefix=prefix,
212                        data_try=data_try)
213
214                if not check_if_trace_crashes(newtrace, outpath):
215                    data_bin_list[j] = '1'
216                    newtrace[i] = prior
217        i += 1
218
219
220def minimize_trace(inpath, outpath):
221    global TIMEOUT
222    with open(inpath) as f:
223        trace = f.readlines()
224    start = time.time()
225    if not check_if_trace_crashes(trace, outpath):
226        sys.exit("The input qtest trace didn't cause a crash...")
227    end = time.time()
228    print("Crashed in {} seconds".format(end-start))
229    TIMEOUT = (end-start)*5
230    print("Setting the timeout for {} seconds".format(TIMEOUT))
231
232    newtrace = trace[:]
233    global M1, M2
234
235    # remove lines
236    old_len = len(newtrace) + 1
237    while(old_len > len(newtrace)):
238        old_len = len(newtrace)
239        print("trace lenth = ", old_len)
240        remove_lines(newtrace, outpath)
241        if not M1 and not M2:
242            break
243        newtrace = list(filter(lambda s: s != "", newtrace))
244    assert(check_if_trace_crashes(newtrace, outpath))
245
246    # set bits to zero
247    if M2:
248        clear_bits(newtrace, outpath)
249    assert(check_if_trace_crashes(newtrace, outpath))
250
251
252if __name__ == '__main__':
253    if len(sys.argv) < 3:
254        usage()
255    if "-M1" in sys.argv:
256        M1 = True
257    if "-M2" in sys.argv:
258        M2 = True
259    QEMU_PATH = os.getenv("QEMU_PATH")
260    QEMU_ARGS = os.getenv("QEMU_ARGS")
261    if QEMU_PATH is None or QEMU_ARGS is None:
262        usage()
263    # if "accel" not in QEMU_ARGS:
264    #     QEMU_ARGS += " -accel qtest"
265    CRASH_TOKEN = os.getenv("CRASH_TOKEN")
266    QEMU_ARGS += " -qtest stdio -monitor none -serial none "
267    minimize_trace(sys.argv[-2], sys.argv[-1])
268