xref: /openbmc/u-boot/tools/patman/patchstream.py (revision 3765b3e7)
1# Copyright (c) 2011 The Chromium OS Authors.
2#
3# SPDX-License-Identifier:	GPL-2.0+
4#
5
6import os
7import re
8import shutil
9import tempfile
10
11import command
12import commit
13import gitutil
14from series import Series
15
16# Tags that we detect and remove
17re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:'
18    '|Reviewed-on:|Commit-\w*:')
19
20# Lines which are allowed after a TEST= line
21re_allowed_after_test = re.compile('^Signed-off-by:')
22
23# Signoffs
24re_signoff = re.compile('^Signed-off-by:')
25
26# The start of the cover letter
27re_cover = re.compile('^Cover-letter:')
28
29# A cover letter Cc
30re_cover_cc = re.compile('^Cover-letter-cc: *(.*)')
31
32# Patch series tag
33re_series = re.compile('^Series-([a-z-]*): *(.*)')
34
35# Commit tags that we want to collect and keep
36re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Cc): (.*)')
37
38# The start of a new commit in the git log
39re_commit = re.compile('^commit ([0-9a-f]*)$')
40
41# We detect these since checkpatch doesn't always do it
42re_space_before_tab = re.compile('^[+].* \t')
43
44# States we can be in - can we use range() and still have comments?
45STATE_MSG_HEADER = 0        # Still in the message header
46STATE_PATCH_SUBJECT = 1     # In patch subject (first line of log for a commit)
47STATE_PATCH_HEADER = 2      # In patch header (after the subject)
48STATE_DIFFS = 3             # In the diff part (past --- line)
49
50class PatchStream:
51    """Class for detecting/injecting tags in a patch or series of patches
52
53    We support processing the output of 'git log' to read out the tags we
54    are interested in. We can also process a patch file in order to remove
55    unwanted tags or inject additional ones. These correspond to the two
56    phases of processing.
57    """
58    def __init__(self, series, name=None, is_log=False):
59        self.skip_blank = False          # True to skip a single blank line
60        self.found_test = False          # Found a TEST= line
61        self.lines_after_test = 0        # MNumber of lines found after TEST=
62        self.warn = []                   # List of warnings we have collected
63        self.linenum = 1                 # Output line number we are up to
64        self.in_section = None           # Name of start...END section we are in
65        self.notes = []                  # Series notes
66        self.section = []                # The current section...END section
67        self.series = series             # Info about the patch series
68        self.is_log = is_log             # True if indent like git log
69        self.in_change = 0               # Non-zero if we are in a change list
70        self.blank_count = 0             # Number of blank lines stored up
71        self.state = STATE_MSG_HEADER    # What state are we in?
72        self.tags = []                   # Tags collected, like Tested-by...
73        self.signoff = []                # Contents of signoff line
74        self.commit = None               # Current commit
75
76    def AddToSeries(self, line, name, value):
77        """Add a new Series-xxx tag.
78
79        When a Series-xxx tag is detected, we come here to record it, if we
80        are scanning a 'git log'.
81
82        Args:
83            line: Source line containing tag (useful for debug/error messages)
84            name: Tag name (part after 'Series-')
85            value: Tag value (part after 'Series-xxx: ')
86        """
87        if name == 'notes':
88            self.in_section = name
89            self.skip_blank = False
90        if self.is_log:
91            self.series.AddTag(self.commit, line, name, value)
92
93    def CloseCommit(self):
94        """Save the current commit into our commit list, and reset our state"""
95        if self.commit and self.is_log:
96            self.series.AddCommit(self.commit)
97            self.commit = None
98
99    def FormatTags(self, tags):
100        out_list = []
101        for tag in sorted(tags):
102            if tag.startswith('Cc:'):
103                tag_list = tag[4:].split(',')
104                out_list += gitutil.BuildEmailList(tag_list, 'Cc:')
105            else:
106                out_list.append(tag)
107        return out_list
108
109    def ProcessLine(self, line):
110        """Process a single line of a patch file or commit log
111
112        This process a line and returns a list of lines to output. The list
113        may be empty or may contain multiple output lines.
114
115        This is where all the complicated logic is located. The class's
116        state is used to move between different states and detect things
117        properly.
118
119        We can be in one of two modes:
120            self.is_log == True: This is 'git log' mode, where most output is
121                indented by 4 characters and we are scanning for tags
122
123            self.is_log == False: This is 'patch' mode, where we already have
124                all the tags, and are processing patches to remove junk we
125                don't want, and add things we think are required.
126
127        Args:
128            line: text line to process
129
130        Returns:
131            list of output lines, or [] if nothing should be output
132        """
133        # Initially we have no output. Prepare the input line string
134        out = []
135        line = line.rstrip('\n')
136        if self.is_log:
137            if line[:4] == '    ':
138                line = line[4:]
139
140        # Handle state transition and skipping blank lines
141        series_match = re_series.match(line)
142        commit_match = re_commit.match(line) if self.is_log else None
143        cover_cc_match = re_cover_cc.match(line)
144        tag_match = None
145        if self.state == STATE_PATCH_HEADER:
146            tag_match = re_tag.match(line)
147        is_blank = not line.strip()
148        if is_blank:
149            if (self.state == STATE_MSG_HEADER
150                    or self.state == STATE_PATCH_SUBJECT):
151                self.state += 1
152
153            # We don't have a subject in the text stream of patch files
154            # It has its own line with a Subject: tag
155            if not self.is_log and self.state == STATE_PATCH_SUBJECT:
156                self.state += 1
157        elif commit_match:
158            self.state = STATE_MSG_HEADER
159
160        # If we are in a section, keep collecting lines until we see END
161        if self.in_section:
162            if line == 'END':
163                if self.in_section == 'cover':
164                    self.series.cover = self.section
165                elif self.in_section == 'notes':
166                    if self.is_log:
167                        self.series.notes += self.section
168                else:
169                    self.warn.append("Unknown section '%s'" % self.in_section)
170                self.in_section = None
171                self.skip_blank = True
172                self.section = []
173            else:
174                self.section.append(line)
175
176        # Detect the commit subject
177        elif not is_blank and self.state == STATE_PATCH_SUBJECT:
178            self.commit.subject = line
179
180        # Detect the tags we want to remove, and skip blank lines
181        elif re_remove.match(line):
182            self.skip_blank = True
183
184            # TEST= should be the last thing in the commit, so remove
185            # everything after it
186            if line.startswith('TEST='):
187                self.found_test = True
188        elif self.skip_blank and is_blank:
189            self.skip_blank = False
190
191        # Detect the start of a cover letter section
192        elif re_cover.match(line):
193            self.in_section = 'cover'
194            self.skip_blank = False
195
196        elif cover_cc_match:
197            value = cover_cc_match.group(1)
198            self.AddToSeries(line, 'cover-cc', value)
199
200        # If we are in a change list, key collected lines until a blank one
201        elif self.in_change:
202            if is_blank:
203                # Blank line ends this change list
204                self.in_change = 0
205            elif line == '---' or re_signoff.match(line):
206                self.in_change = 0
207                out = self.ProcessLine(line)
208            else:
209                if self.is_log:
210                    self.series.AddChange(self.in_change, self.commit, line)
211            self.skip_blank = False
212
213        # Detect Series-xxx tags
214        elif series_match:
215            name = series_match.group(1)
216            value = series_match.group(2)
217            if name == 'changes':
218                # value is the version number: e.g. 1, or 2
219                try:
220                    value = int(value)
221                except ValueError as str:
222                    raise ValueError("%s: Cannot decode version info '%s'" %
223                        (self.commit.hash, line))
224                self.in_change = int(value)
225            else:
226                self.AddToSeries(line, name, value)
227                self.skip_blank = True
228
229        # Detect the start of a new commit
230        elif commit_match:
231            self.CloseCommit()
232            # TODO: We should store the whole hash, and just display a subset
233            self.commit = commit.Commit(commit_match.group(1)[:8])
234
235        # Detect tags in the commit message
236        elif tag_match:
237            # Remove Tested-by self, since few will take much notice
238            if (tag_match.group(1) == 'Tested-by' and
239                    tag_match.group(2).find(os.getenv('USER') + '@') != -1):
240                self.warn.append("Ignoring %s" % line)
241            elif tag_match.group(1) == 'Cc':
242                self.commit.AddCc(tag_match.group(2).split(','))
243            else:
244                self.tags.append(line);
245
246        # Well that means this is an ordinary line
247        else:
248            pos = 1
249            # Look for ugly ASCII characters
250            for ch in line:
251                # TODO: Would be nicer to report source filename and line
252                if ord(ch) > 0x80:
253                    self.warn.append("Line %d/%d ('%s') has funny ascii char" %
254                        (self.linenum, pos, line))
255                pos += 1
256
257            # Look for space before tab
258            m = re_space_before_tab.match(line)
259            if m:
260                self.warn.append('Line %d/%d has space before tab' %
261                    (self.linenum, m.start()))
262
263            # OK, we have a valid non-blank line
264            out = [line]
265            self.linenum += 1
266            self.skip_blank = False
267            if self.state == STATE_DIFFS:
268                pass
269
270            # If this is the start of the diffs section, emit our tags and
271            # change log
272            elif line == '---':
273                self.state = STATE_DIFFS
274
275                # Output the tags (signeoff first), then change list
276                out = []
277                log = self.series.MakeChangeLog(self.commit)
278                out += self.FormatTags(self.tags)
279                out += [line] + log
280            elif self.found_test:
281                if not re_allowed_after_test.match(line):
282                    self.lines_after_test += 1
283
284        return out
285
286    def Finalize(self):
287        """Close out processing of this patch stream"""
288        self.CloseCommit()
289        if self.lines_after_test:
290            self.warn.append('Found %d lines after TEST=' %
291                    self.lines_after_test)
292
293    def ProcessStream(self, infd, outfd):
294        """Copy a stream from infd to outfd, filtering out unwanting things.
295
296        This is used to process patch files one at a time.
297
298        Args:
299            infd: Input stream file object
300            outfd: Output stream file object
301        """
302        # Extract the filename from each diff, for nice warnings
303        fname = None
304        last_fname = None
305        re_fname = re.compile('diff --git a/(.*) b/.*')
306        while True:
307            line = infd.readline()
308            if not line:
309                break
310            out = self.ProcessLine(line)
311
312            # Try to detect blank lines at EOF
313            for line in out:
314                match = re_fname.match(line)
315                if match:
316                    last_fname = fname
317                    fname = match.group(1)
318                if line == '+':
319                    self.blank_count += 1
320                else:
321                    if self.blank_count and (line == '-- ' or match):
322                        self.warn.append("Found possible blank line(s) at "
323                                "end of file '%s'" % last_fname)
324                    outfd.write('+\n' * self.blank_count)
325                    outfd.write(line + '\n')
326                    self.blank_count = 0
327        self.Finalize()
328
329
330def GetMetaDataForList(commit_range, git_dir=None, count=None,
331                       series = Series()):
332    """Reads out patch series metadata from the commits
333
334    This does a 'git log' on the relevant commits and pulls out the tags we
335    are interested in.
336
337    Args:
338        commit_range: Range of commits to count (e.g. 'HEAD..base')
339        git_dir: Path to git repositiory (None to use default)
340        count: Number of commits to list, or None for no limit
341        series: Series object to add information into. By default a new series
342            is started.
343    Returns:
344        A Series object containing information about the commits.
345    """
346    params = ['git', 'log', '--no-color', '--reverse', '--no-decorate',
347                    commit_range]
348    if count is not None:
349        params[2:2] = ['-n%d' % count]
350    if git_dir:
351        params[1:1] = ['--git-dir', git_dir]
352    pipe = [params]
353    stdout = command.RunPipe(pipe, capture=True).stdout
354    ps = PatchStream(series, is_log=True)
355    for line in stdout.splitlines():
356        ps.ProcessLine(line)
357    ps.Finalize()
358    return series
359
360def GetMetaData(start, count):
361    """Reads out patch series metadata from the commits
362
363    This does a 'git log' on the relevant commits and pulls out the tags we
364    are interested in.
365
366    Args:
367        start: Commit to start from: 0=HEAD, 1=next one, etc.
368        count: Number of commits to list
369    """
370    return GetMetaDataForList('HEAD~%d' % start, None, count)
371
372def FixPatch(backup_dir, fname, series, commit):
373    """Fix up a patch file, by adding/removing as required.
374
375    We remove our tags from the patch file, insert changes lists, etc.
376    The patch file is processed in place, and overwritten.
377
378    A backup file is put into backup_dir (if not None).
379
380    Args:
381        fname: Filename to patch file to process
382        series: Series information about this patch set
383        commit: Commit object for this patch file
384    Return:
385        A list of errors, or [] if all ok.
386    """
387    handle, tmpname = tempfile.mkstemp()
388    outfd = os.fdopen(handle, 'w')
389    infd = open(fname, 'r')
390    ps = PatchStream(series)
391    ps.commit = commit
392    ps.ProcessStream(infd, outfd)
393    infd.close()
394    outfd.close()
395
396    # Create a backup file if required
397    if backup_dir:
398        shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
399    shutil.move(tmpname, fname)
400    return ps.warn
401
402def FixPatches(series, fnames):
403    """Fix up a list of patches identified by filenames
404
405    The patch files are processed in place, and overwritten.
406
407    Args:
408        series: The series object
409        fnames: List of patch files to process
410    """
411    # Current workflow creates patches, so we shouldn't need a backup
412    backup_dir = None  #tempfile.mkdtemp('clean-patch')
413    count = 0
414    for fname in fnames:
415        commit = series.commits[count]
416        commit.patch = fname
417        result = FixPatch(backup_dir, fname, series, commit)
418        if result:
419            print '%d warnings for %s:' % (len(result), fname)
420            for warn in result:
421                print '\t', warn
422            print
423        count += 1
424    print 'Cleaned %d patches' % count
425    return series
426
427def InsertCoverLetter(fname, series, count):
428    """Inserts a cover letter with the required info into patch 0
429
430    Args:
431        fname: Input / output filename of the cover letter file
432        series: Series object
433        count: Number of patches in the series
434    """
435    fd = open(fname, 'r')
436    lines = fd.readlines()
437    fd.close()
438
439    fd = open(fname, 'w')
440    text = series.cover
441    prefix = series.GetPatchPrefix()
442    for line in lines:
443        if line.startswith('Subject:'):
444            # TODO: if more than 10 patches this should save 00/xx, not 0/xx
445            line = 'Subject: [%s 0/%d] %s\n' % (prefix, count, text[0])
446
447        # Insert our cover letter
448        elif line.startswith('*** BLURB HERE ***'):
449            # First the blurb test
450            line = '\n'.join(text[1:]) + '\n'
451            if series.get('notes'):
452                line += '\n'.join(series.notes) + '\n'
453
454            # Now the change list
455            out = series.MakeChangeLog(None)
456            line += '\n' + '\n'.join(out)
457        fd.write(line)
458    fd.close()
459