1#! /usr/bin/env python3
2#
3# Copyright OpenEmbedded Contributors
4#
5# SPDX-License-Identifier: GPL-2.0-only
6#
7
8# TODO
9# - option to just list all broken files
10# - test suite
11# - validate signed-off-by
12
13status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream")
14
15class Result:
16    # Whether the patch has an Upstream-Status or not
17    missing_upstream_status = False
18    # If the Upstream-Status tag is malformed in some way (string for bad bit)
19    malformed_upstream_status = None
20    # If the Upstream-Status value is unknown (boolean)
21    unknown_upstream_status = False
22    # The upstream status value (Pending, etc)
23    upstream_status = None
24    # Whether the patch has a Signed-off-by or not
25    missing_sob = False
26    # Whether the Signed-off-by tag is malformed in some way
27    malformed_sob = False
28    # The Signed-off-by tag value
29    sob = None
30    # Whether a patch looks like a CVE but doesn't have a CVE tag
31    missing_cve = False
32
33def blame_patch(patch):
34    """
35    From a patch filename, return a list of "commit summary (author name <author
36    email>)" strings representing the history.
37    """
38    import subprocess
39    return subprocess.check_output(("git", "log",
40                                    "--follow", "--find-renames", "--diff-filter=A",
41                                    "--format=%s (%aN <%aE>)",
42                                    "--", patch)).decode("utf-8").splitlines()
43
44def patchreview(path, patches):
45    import re, os.path
46
47    # General pattern: start of line, optional whitespace, tag with optional
48    # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
49    # insensitive.
50    sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
51    status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE)
52    cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
53    cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
54
55    results = {}
56
57    for patch in patches:
58
59        fullpath = os.path.join(path, patch)
60        result = Result()
61        results[fullpath] = result
62
63        content = open(fullpath, encoding='ascii', errors='ignore').read()
64
65        # Find the Signed-off-by tag
66        match = sob_re.search(content)
67        if match:
68            value = match.group(1)
69            if value != "Signed-off-by:":
70                result.malformed_sob = value
71            result.sob = match.group(2)
72        else:
73            result.missing_sob = True
74
75
76        # Find the Upstream-Status tag
77        match = status_re.search(content)
78        if match:
79            value = match.group(1)
80            if value != "Upstream-Status:":
81                result.malformed_upstream_status = value
82
83            value = match.group(2).lower()
84            # TODO: check case
85            if value not in status_values:
86                result.unknown_upstream_status = True
87            result.upstream_status = value
88        else:
89            result.missing_upstream_status = True
90
91        # Check that patches which looks like CVEs have CVE tags
92        if cve_re.search(patch) or cve_re.search(content):
93            if not cve_tag_re.search(content):
94                result.missing_cve = True
95        # TODO: extract CVE list
96
97    return results
98
99
100def analyse(results, want_blame=False, verbose=True):
101    """
102    want_blame: display blame data for each malformed patch
103    verbose: display per-file results instead of just summary
104    """
105
106    # want_blame requires verbose, so disable blame if we're not verbose
107    if want_blame and not verbose:
108        want_blame = False
109
110    total_patches = 0
111    missing_sob = 0
112    malformed_sob = 0
113    missing_status = 0
114    malformed_status = 0
115    missing_cve = 0
116    pending_patches = 0
117
118    for patch in sorted(results):
119        r = results[patch]
120        total_patches += 1
121        need_blame = False
122
123        # Build statistics
124        if r.missing_sob:
125            missing_sob += 1
126        if r.malformed_sob:
127            malformed_sob += 1
128        if r.missing_upstream_status:
129            missing_status += 1
130        if r.malformed_upstream_status or r.unknown_upstream_status:
131            malformed_status += 1
132            # Count patches with no status as pending
133            pending_patches +=1
134        if r.missing_cve:
135            missing_cve += 1
136        if r.upstream_status == "pending":
137            pending_patches += 1
138
139        # Output warnings
140        if r.missing_sob:
141            need_blame = True
142            if verbose:
143                print("Missing Signed-off-by tag (%s)" % patch)
144        if r.malformed_sob:
145            need_blame = True
146            if verbose:
147                print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
148        if r.missing_cve:
149            need_blame = True
150            if verbose:
151                print("Missing CVE tag (%s)" % patch)
152        if r.missing_upstream_status:
153            need_blame = True
154            if verbose:
155                print("Missing Upstream-Status tag (%s)" % patch)
156        if r.malformed_upstream_status:
157            need_blame = True
158            if verbose:
159                print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
160        if r.unknown_upstream_status:
161            need_blame = True
162            if verbose:
163                print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
164
165        if want_blame and need_blame:
166            print("\n".join(blame_patch(patch)) + "\n")
167
168    def percent(num):
169        try:
170            return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
171        except ZeroDivisionError:
172            return "N/A"
173
174    if verbose:
175        print()
176
177    print("""Total patches found: %d
178Patches missing Signed-off-by: %s
179Patches with malformed Signed-off-by: %s
180Patches missing CVE: %s
181Patches missing Upstream-Status: %s
182Patches with malformed Upstream-Status: %s
183Patches in Pending state: %s""" % (total_patches,
184                                   percent(missing_sob),
185                                   percent(malformed_sob),
186                                   percent(missing_cve),
187                                   percent(missing_status),
188                                   percent(malformed_status),
189                                   percent(pending_patches)))
190
191
192
193def histogram(results):
194    from toolz import recipes, dicttoolz
195    import math
196    counts = recipes.countby(lambda r: r.upstream_status, results.values())
197    bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
198    for k in bars:
199        print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
200
201
202if __name__ == "__main__":
203    import argparse, subprocess, os
204
205    args = argparse.ArgumentParser(description="Patch Review Tool")
206    args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
207    args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
208    args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
209    args.add_argument("-j", "--json", help="update JSON")
210    args.add_argument("directory", help="directory to scan")
211    args = args.parse_args()
212
213    patches = subprocess.check_output(("git", "-C", args.directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split()
214    results = patchreview(args.directory, patches)
215    analyse(results, want_blame=args.blame, verbose=args.verbose)
216
217    if args.json:
218        import json, os.path, collections
219        if os.path.isfile(args.json):
220            data = json.load(open(args.json))
221        else:
222            data = []
223
224        row = collections.Counter()
225        row["total"] = len(results)
226        row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip()
227        row["commit"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%H"]).decode("utf-8").strip()
228        for r in results.values():
229            if r.upstream_status in status_values:
230                row[r.upstream_status] += 1
231            if r.malformed_upstream_status or r.missing_upstream_status:
232                row['malformed-upstream-status'] += 1
233            if r.malformed_sob or r.missing_sob:
234                row['malformed-sob'] += 1
235
236        data.append(row)
237        json.dump(data, open(args.json, "w"), sort_keys=True, indent="\t")
238
239    if args.histogram:
240        print()
241        histogram(results)
242