xref: /openbmc/u-boot/tools/patman/patchstream.py (revision c7ba7bdc9d9940313ff5a63644ae3d74c77636cc)
1# Copyright (c) 2011 The Chromium OS Authors.
2#
3# SPDX-License-Identifier:	GPL-2.0+
4#
5
6import math
7import os
8import re
9import shutil
10import tempfile
11
12import command
13import commit
14import gitutil
15from series import Series
16
17# Tags that we detect and remove
18re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:'
19    '|Reviewed-on:|Commit-\w*:')
20
21# Lines which are allowed after a TEST= line
22re_allowed_after_test = re.compile('^Signed-off-by:')
23
24# Signoffs
25re_signoff = re.compile('^Signed-off-by: *(.*)')
26
27# The start of the cover letter
28re_cover = re.compile('^Cover-letter:')
29
30# A cover letter Cc
31re_cover_cc = re.compile('^Cover-letter-cc: *(.*)')
32
33# Patch series tag
34re_series_tag = re.compile('^Series-([a-z-]*): *(.*)')
35
36# Commit series tag
37re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)')
38
39# Commit tags that we want to collect and keep
40re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)')
41
42# The start of a new commit in the git log
43re_commit = re.compile('^commit ([0-9a-f]*)$')
44
45# We detect these since checkpatch doesn't always do it
46re_space_before_tab = re.compile('^[+].* \t')
47
48# States we can be in - can we use range() and still have comments?
49STATE_MSG_HEADER = 0        # Still in the message header
50STATE_PATCH_SUBJECT = 1     # In patch subject (first line of log for a commit)
51STATE_PATCH_HEADER = 2      # In patch header (after the subject)
52STATE_DIFFS = 3             # In the diff part (past --- line)
53
54class PatchStream:
55    """Class for detecting/injecting tags in a patch or series of patches
56
57    We support processing the output of 'git log' to read out the tags we
58    are interested in. We can also process a patch file in order to remove
59    unwanted tags or inject additional ones. These correspond to the two
60    phases of processing.
61    """
62    def __init__(self, series, name=None, is_log=False):
63        self.skip_blank = False          # True to skip a single blank line
64        self.found_test = False          # Found a TEST= line
65        self.lines_after_test = 0        # MNumber of lines found after TEST=
66        self.warn = []                   # List of warnings we have collected
67        self.linenum = 1                 # Output line number we are up to
68        self.in_section = None           # Name of start...END section we are in
69        self.notes = []                  # Series notes
70        self.section = []                # The current section...END section
71        self.series = series             # Info about the patch series
72        self.is_log = is_log             # True if indent like git log
73        self.in_change = 0               # Non-zero if we are in a change list
74        self.blank_count = 0             # Number of blank lines stored up
75        self.state = STATE_MSG_HEADER    # What state are we in?
76        self.signoff = []                # Contents of signoff line
77        self.commit = None               # Current commit
78
79    def AddToSeries(self, line, name, value):
80        """Add a new Series-xxx tag.
81
82        When a Series-xxx tag is detected, we come here to record it, if we
83        are scanning a 'git log'.
84
85        Args:
86            line: Source line containing tag (useful for debug/error messages)
87            name: Tag name (part after 'Series-')
88            value: Tag value (part after 'Series-xxx: ')
89        """
90        if name == 'notes':
91            self.in_section = name
92            self.skip_blank = False
93        if self.is_log:
94            self.series.AddTag(self.commit, line, name, value)
95
96    def AddToCommit(self, line, name, value):
97        """Add a new Commit-xxx tag.
98
99        When a Commit-xxx tag is detected, we come here to record it.
100
101        Args:
102            line: Source line containing tag (useful for debug/error messages)
103            name: Tag name (part after 'Commit-')
104            value: Tag value (part after 'Commit-xxx: ')
105        """
106        if name == 'notes':
107            self.in_section = 'commit-' + name
108            self.skip_blank = False
109
110    def CloseCommit(self):
111        """Save the current commit into our commit list, and reset our state"""
112        if self.commit and self.is_log:
113            self.series.AddCommit(self.commit)
114            self.commit = None
115
116    def ProcessLine(self, line):
117        """Process a single line of a patch file or commit log
118
119        This process a line and returns a list of lines to output. The list
120        may be empty or may contain multiple output lines.
121
122        This is where all the complicated logic is located. The class's
123        state is used to move between different states and detect things
124        properly.
125
126        We can be in one of two modes:
127            self.is_log == True: This is 'git log' mode, where most output is
128                indented by 4 characters and we are scanning for tags
129
130            self.is_log == False: This is 'patch' mode, where we already have
131                all the tags, and are processing patches to remove junk we
132                don't want, and add things we think are required.
133
134        Args:
135            line: text line to process
136
137        Returns:
138            list of output lines, or [] if nothing should be output
139        """
140        # Initially we have no output. Prepare the input line string
141        out = []
142        line = line.rstrip('\n')
143
144        commit_match = re_commit.match(line) if self.is_log else None
145
146        if self.is_log:
147            if line[:4] == '    ':
148                line = line[4:]
149
150        # Handle state transition and skipping blank lines
151        series_tag_match = re_series_tag.match(line)
152        commit_tag_match = re_commit_tag.match(line)
153        cover_cc_match = re_cover_cc.match(line)
154        signoff_match = re_signoff.match(line)
155        tag_match = None
156        if self.state == STATE_PATCH_HEADER:
157            tag_match = re_tag.match(line)
158        is_blank = not line.strip()
159        if is_blank:
160            if (self.state == STATE_MSG_HEADER
161                    or self.state == STATE_PATCH_SUBJECT):
162                self.state += 1
163
164            # We don't have a subject in the text stream of patch files
165            # It has its own line with a Subject: tag
166            if not self.is_log and self.state == STATE_PATCH_SUBJECT:
167                self.state += 1
168        elif commit_match:
169            self.state = STATE_MSG_HEADER
170
171        # If we are in a section, keep collecting lines until we see END
172        if self.in_section:
173            if line == 'END':
174                if self.in_section == 'cover':
175                    self.series.cover = self.section
176                elif self.in_section == 'notes':
177                    if self.is_log:
178                        self.series.notes += self.section
179                elif self.in_section == 'commit-notes':
180                    if self.is_log:
181                        self.commit.notes += self.section
182                else:
183                    self.warn.append("Unknown section '%s'" % self.in_section)
184                self.in_section = None
185                self.skip_blank = True
186                self.section = []
187            else:
188                self.section.append(line)
189
190        # Detect the commit subject
191        elif not is_blank and self.state == STATE_PATCH_SUBJECT:
192            self.commit.subject = line
193
194        # Detect the tags we want to remove, and skip blank lines
195        elif re_remove.match(line) and not commit_tag_match:
196            self.skip_blank = True
197
198            # TEST= should be the last thing in the commit, so remove
199            # everything after it
200            if line.startswith('TEST='):
201                self.found_test = True
202        elif self.skip_blank and is_blank:
203            self.skip_blank = False
204
205        # Detect the start of a cover letter section
206        elif re_cover.match(line):
207            self.in_section = 'cover'
208            self.skip_blank = False
209
210        elif cover_cc_match:
211            value = cover_cc_match.group(1)
212            self.AddToSeries(line, 'cover-cc', value)
213
214        # If we are in a change list, key collected lines until a blank one
215        elif self.in_change:
216            if is_blank:
217                # Blank line ends this change list
218                self.in_change = 0
219            elif line == '---':
220                self.in_change = 0
221                out = self.ProcessLine(line)
222            else:
223                if self.is_log:
224                    self.series.AddChange(self.in_change, self.commit, line)
225            self.skip_blank = False
226
227        # Detect Series-xxx tags
228        elif series_tag_match:
229            name = series_tag_match.group(1)
230            value = series_tag_match.group(2)
231            if name == 'changes':
232                # value is the version number: e.g. 1, or 2
233                try:
234                    value = int(value)
235                except ValueError as str:
236                    raise ValueError("%s: Cannot decode version info '%s'" %
237                        (self.commit.hash, line))
238                self.in_change = int(value)
239            else:
240                self.AddToSeries(line, name, value)
241                self.skip_blank = True
242
243        # Detect Commit-xxx tags
244        elif commit_tag_match:
245            name = commit_tag_match.group(1)
246            value = commit_tag_match.group(2)
247            if name == 'notes':
248                self.AddToCommit(line, name, value)
249                self.skip_blank = True
250
251        # Detect the start of a new commit
252        elif commit_match:
253            self.CloseCommit()
254            self.commit = commit.Commit(commit_match.group(1))
255
256        # Detect tags in the commit message
257        elif tag_match:
258            # Remove Tested-by self, since few will take much notice
259            if (tag_match.group(1) == 'Tested-by' and
260                    tag_match.group(2).find(os.getenv('USER') + '@') != -1):
261                self.warn.append("Ignoring %s" % line)
262            elif tag_match.group(1) == 'Patch-cc':
263                self.commit.AddCc(tag_match.group(2).split(','))
264            else:
265                out = [line]
266
267        # Suppress duplicate signoffs
268        elif signoff_match:
269            if (self.is_log or not self.commit or
270                self.commit.CheckDuplicateSignoff(signoff_match.group(1))):
271                out = [line]
272
273        # Well that means this is an ordinary line
274        else:
275            pos = 1
276            # Look for ugly ASCII characters
277            for ch in line:
278                # TODO: Would be nicer to report source filename and line
279                if ord(ch) > 0x80:
280                    self.warn.append("Line %d/%d ('%s') has funny ascii char" %
281                        (self.linenum, pos, line))
282                pos += 1
283
284            # Look for space before tab
285            m = re_space_before_tab.match(line)
286            if m:
287                self.warn.append('Line %d/%d has space before tab' %
288                    (self.linenum, m.start()))
289
290            # OK, we have a valid non-blank line
291            out = [line]
292            self.linenum += 1
293            self.skip_blank = False
294            if self.state == STATE_DIFFS:
295                pass
296
297            # If this is the start of the diffs section, emit our tags and
298            # change log
299            elif line == '---':
300                self.state = STATE_DIFFS
301
302                # Output the tags (signeoff first), then change list
303                out = []
304                log = self.series.MakeChangeLog(self.commit)
305                out += [line]
306                if self.commit:
307                    out += self.commit.notes
308                out += [''] + log
309            elif self.found_test:
310                if not re_allowed_after_test.match(line):
311                    self.lines_after_test += 1
312
313        return out
314
315    def Finalize(self):
316        """Close out processing of this patch stream"""
317        self.CloseCommit()
318        if self.lines_after_test:
319            self.warn.append('Found %d lines after TEST=' %
320                    self.lines_after_test)
321
322    def ProcessStream(self, infd, outfd):
323        """Copy a stream from infd to outfd, filtering out unwanting things.
324
325        This is used to process patch files one at a time.
326
327        Args:
328            infd: Input stream file object
329            outfd: Output stream file object
330        """
331        # Extract the filename from each diff, for nice warnings
332        fname = None
333        last_fname = None
334        re_fname = re.compile('diff --git a/(.*) b/.*')
335        while True:
336            line = infd.readline()
337            if not line:
338                break
339            out = self.ProcessLine(line)
340
341            # Try to detect blank lines at EOF
342            for line in out:
343                match = re_fname.match(line)
344                if match:
345                    last_fname = fname
346                    fname = match.group(1)
347                if line == '+':
348                    self.blank_count += 1
349                else:
350                    if self.blank_count and (line == '-- ' or match):
351                        self.warn.append("Found possible blank line(s) at "
352                                "end of file '%s'" % last_fname)
353                    outfd.write('+\n' * self.blank_count)
354                    outfd.write(line + '\n')
355                    self.blank_count = 0
356        self.Finalize()
357
358
359def GetMetaDataForList(commit_range, git_dir=None, count=None,
360                       series = None, allow_overwrite=False):
361    """Reads out patch series metadata from the commits
362
363    This does a 'git log' on the relevant commits and pulls out the tags we
364    are interested in.
365
366    Args:
367        commit_range: Range of commits to count (e.g. 'HEAD..base')
368        git_dir: Path to git repositiory (None to use default)
369        count: Number of commits to list, or None for no limit
370        series: Series object to add information into. By default a new series
371            is started.
372        allow_overwrite: Allow tags to overwrite an existing tag
373    Returns:
374        A Series object containing information about the commits.
375    """
376    if not series:
377        series = Series()
378    series.allow_overwrite = allow_overwrite
379    params = gitutil.LogCmd(commit_range, reverse=True, count=count,
380                            git_dir=git_dir)
381    stdout = command.RunPipe([params], capture=True).stdout
382    ps = PatchStream(series, is_log=True)
383    for line in stdout.splitlines():
384        ps.ProcessLine(line)
385    ps.Finalize()
386    return series
387
388def GetMetaData(start, count):
389    """Reads out patch series metadata from the commits
390
391    This does a 'git log' on the relevant commits and pulls out the tags we
392    are interested in.
393
394    Args:
395        start: Commit to start from: 0=HEAD, 1=next one, etc.
396        count: Number of commits to list
397    """
398    return GetMetaDataForList('HEAD~%d' % start, None, count)
399
400def FixPatch(backup_dir, fname, series, commit):
401    """Fix up a patch file, by adding/removing as required.
402
403    We remove our tags from the patch file, insert changes lists, etc.
404    The patch file is processed in place, and overwritten.
405
406    A backup file is put into backup_dir (if not None).
407
408    Args:
409        fname: Filename to patch file to process
410        series: Series information about this patch set
411        commit: Commit object for this patch file
412    Return:
413        A list of errors, or [] if all ok.
414    """
415    handle, tmpname = tempfile.mkstemp()
416    outfd = os.fdopen(handle, 'w')
417    infd = open(fname, 'r')
418    ps = PatchStream(series)
419    ps.commit = commit
420    ps.ProcessStream(infd, outfd)
421    infd.close()
422    outfd.close()
423
424    # Create a backup file if required
425    if backup_dir:
426        shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
427    shutil.move(tmpname, fname)
428    return ps.warn
429
430def FixPatches(series, fnames):
431    """Fix up a list of patches identified by filenames
432
433    The patch files are processed in place, and overwritten.
434
435    Args:
436        series: The series object
437        fnames: List of patch files to process
438    """
439    # Current workflow creates patches, so we shouldn't need a backup
440    backup_dir = None  #tempfile.mkdtemp('clean-patch')
441    count = 0
442    for fname in fnames:
443        commit = series.commits[count]
444        commit.patch = fname
445        result = FixPatch(backup_dir, fname, series, commit)
446        if result:
447            print '%d warnings for %s:' % (len(result), fname)
448            for warn in result:
449                print '\t', warn
450            print
451        count += 1
452    print 'Cleaned %d patches' % count
453    return series
454
455def InsertCoverLetter(fname, series, count):
456    """Inserts a cover letter with the required info into patch 0
457
458    Args:
459        fname: Input / output filename of the cover letter file
460        series: Series object
461        count: Number of patches in the series
462    """
463    fd = open(fname, 'r')
464    lines = fd.readlines()
465    fd.close()
466
467    fd = open(fname, 'w')
468    text = series.cover
469    prefix = series.GetPatchPrefix()
470    for line in lines:
471        if line.startswith('Subject:'):
472            # if more than 10 or 100 patches, it should say 00/xx, 000/xxx, etc
473            zero_repeat = int(math.log10(count)) + 1
474            zero = '0' * zero_repeat
475            line = 'Subject: [%s %s/%d] %s\n' % (prefix, zero, count, text[0])
476
477        # Insert our cover letter
478        elif line.startswith('*** BLURB HERE ***'):
479            # First the blurb test
480            line = '\n'.join(text[1:]) + '\n'
481            if series.get('notes'):
482                line += '\n'.join(series.notes) + '\n'
483
484            # Now the change list
485            out = series.MakeChangeLog(None)
486            line += '\n' + '\n'.join(out)
487        fd.write(line)
488    fd.close()
489