xref: /openbmc/u-boot/tools/patman/patchstream.py (revision 63495ad7)
1# Copyright (c) 2011 The Chromium OS Authors.
2#
3# See file CREDITS for list of people who contributed to this
4# project.
5#
6# This program is free software; you can redistribute it and/or
7# modify it under the terms of the GNU General Public License as
8# published by the Free Software Foundation; either version 2 of
9# the License, or (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, write to the Free Software
18# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
19# MA 02111-1307 USA
20#
21
22import os
23import re
24import shutil
25import tempfile
26
27import command
28import commit
29import gitutil
30from series import Series
31
32# Tags that we detect and remove
33re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:'
34    '|Reviewed-on:|Commit-\w*:')
35
36# Lines which are allowed after a TEST= line
37re_allowed_after_test = re.compile('^Signed-off-by:')
38
39# Signoffs
40re_signoff = re.compile('^Signed-off-by:')
41
42# The start of the cover letter
43re_cover = re.compile('^Cover-letter:')
44
45# A cover letter Cc
46re_cover_cc = re.compile('^Cover-letter-cc: *(.*)')
47
48# Patch series tag
49re_series = re.compile('^Series-([a-z-]*): *(.*)')
50
51# Commit tags that we want to collect and keep
52re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Cc): (.*)')
53
54# The start of a new commit in the git log
55re_commit = re.compile('^commit ([0-9a-f]*)$')
56
57# We detect these since checkpatch doesn't always do it
58re_space_before_tab = re.compile('^[+].* \t')
59
60# States we can be in - can we use range() and still have comments?
61STATE_MSG_HEADER = 0        # Still in the message header
62STATE_PATCH_SUBJECT = 1     # In patch subject (first line of log for a commit)
63STATE_PATCH_HEADER = 2      # In patch header (after the subject)
64STATE_DIFFS = 3             # In the diff part (past --- line)
65
66class PatchStream:
67    """Class for detecting/injecting tags in a patch or series of patches
68
69    We support processing the output of 'git log' to read out the tags we
70    are interested in. We can also process a patch file in order to remove
71    unwanted tags or inject additional ones. These correspond to the two
72    phases of processing.
73    """
74    def __init__(self, series, name=None, is_log=False):
75        self.skip_blank = False          # True to skip a single blank line
76        self.found_test = False          # Found a TEST= line
77        self.lines_after_test = 0        # MNumber of lines found after TEST=
78        self.warn = []                   # List of warnings we have collected
79        self.linenum = 1                 # Output line number we are up to
80        self.in_section = None           # Name of start...END section we are in
81        self.notes = []                  # Series notes
82        self.section = []                # The current section...END section
83        self.series = series             # Info about the patch series
84        self.is_log = is_log             # True if indent like git log
85        self.in_change = 0               # Non-zero if we are in a change list
86        self.blank_count = 0             # Number of blank lines stored up
87        self.state = STATE_MSG_HEADER    # What state are we in?
88        self.tags = []                   # Tags collected, like Tested-by...
89        self.signoff = []                # Contents of signoff line
90        self.commit = None               # Current commit
91
92    def AddToSeries(self, line, name, value):
93        """Add a new Series-xxx tag.
94
95        When a Series-xxx tag is detected, we come here to record it, if we
96        are scanning a 'git log'.
97
98        Args:
99            line: Source line containing tag (useful for debug/error messages)
100            name: Tag name (part after 'Series-')
101            value: Tag value (part after 'Series-xxx: ')
102        """
103        if name == 'notes':
104            self.in_section = name
105            self.skip_blank = False
106        if self.is_log:
107            self.series.AddTag(self.commit, line, name, value)
108
109    def CloseCommit(self):
110        """Save the current commit into our commit list, and reset our state"""
111        if self.commit and self.is_log:
112            self.series.AddCommit(self.commit)
113            self.commit = None
114
115    def FormatTags(self, tags):
116        out_list = []
117        for tag in sorted(tags):
118            if tag.startswith('Cc:'):
119                tag_list = tag[4:].split(',')
120                out_list += gitutil.BuildEmailList(tag_list, 'Cc:')
121            else:
122                out_list.append(tag)
123        return out_list
124
125    def ProcessLine(self, line):
126        """Process a single line of a patch file or commit log
127
128        This process a line and returns a list of lines to output. The list
129        may be empty or may contain multiple output lines.
130
131        This is where all the complicated logic is located. The class's
132        state is used to move between different states and detect things
133        properly.
134
135        We can be in one of two modes:
136            self.is_log == True: This is 'git log' mode, where most output is
137                indented by 4 characters and we are scanning for tags
138
139            self.is_log == False: This is 'patch' mode, where we already have
140                all the tags, and are processing patches to remove junk we
141                don't want, and add things we think are required.
142
143        Args:
144            line: text line to process
145
146        Returns:
147            list of output lines, or [] if nothing should be output
148        """
149        # Initially we have no output. Prepare the input line string
150        out = []
151        line = line.rstrip('\n')
152        if self.is_log:
153            if line[:4] == '    ':
154                line = line[4:]
155
156        # Handle state transition and skipping blank lines
157        series_match = re_series.match(line)
158        commit_match = re_commit.match(line) if self.is_log else None
159        cover_cc_match = re_cover_cc.match(line)
160        tag_match = None
161        if self.state == STATE_PATCH_HEADER:
162            tag_match = re_tag.match(line)
163        is_blank = not line.strip()
164        if is_blank:
165            if (self.state == STATE_MSG_HEADER
166                    or self.state == STATE_PATCH_SUBJECT):
167                self.state += 1
168
169            # We don't have a subject in the text stream of patch files
170            # It has its own line with a Subject: tag
171            if not self.is_log and self.state == STATE_PATCH_SUBJECT:
172                self.state += 1
173        elif commit_match:
174            self.state = STATE_MSG_HEADER
175
176        # If we are in a section, keep collecting lines until we see END
177        if self.in_section:
178            if line == 'END':
179                if self.in_section == 'cover':
180                    self.series.cover = self.section
181                elif self.in_section == 'notes':
182                    if self.is_log:
183                        self.series.notes += self.section
184                else:
185                    self.warn.append("Unknown section '%s'" % self.in_section)
186                self.in_section = None
187                self.skip_blank = True
188                self.section = []
189            else:
190                self.section.append(line)
191
192        # Detect the commit subject
193        elif not is_blank and self.state == STATE_PATCH_SUBJECT:
194            self.commit.subject = line
195
196        # Detect the tags we want to remove, and skip blank lines
197        elif re_remove.match(line):
198            self.skip_blank = True
199
200            # TEST= should be the last thing in the commit, so remove
201            # everything after it
202            if line.startswith('TEST='):
203                self.found_test = True
204        elif self.skip_blank and is_blank:
205            self.skip_blank = False
206
207        # Detect the start of a cover letter section
208        elif re_cover.match(line):
209            self.in_section = 'cover'
210            self.skip_blank = False
211
212        elif cover_cc_match:
213            value = cover_cc_match.group(1)
214            self.AddToSeries(line, 'cover-cc', value)
215
216        # If we are in a change list, key collected lines until a blank one
217        elif self.in_change:
218            if is_blank:
219                # Blank line ends this change list
220                self.in_change = 0
221            elif line == '---' or re_signoff.match(line):
222                self.in_change = 0
223                out = self.ProcessLine(line)
224            else:
225                if self.is_log:
226                    self.series.AddChange(self.in_change, self.commit, line)
227            self.skip_blank = False
228
229        # Detect Series-xxx tags
230        elif series_match:
231            name = series_match.group(1)
232            value = series_match.group(2)
233            if name == 'changes':
234                # value is the version number: e.g. 1, or 2
235                try:
236                    value = int(value)
237                except ValueError as str:
238                    raise ValueError("%s: Cannot decode version info '%s'" %
239                        (self.commit.hash, line))
240                self.in_change = int(value)
241            else:
242                self.AddToSeries(line, name, value)
243                self.skip_blank = True
244
245        # Detect the start of a new commit
246        elif commit_match:
247            self.CloseCommit()
248            # TODO: We should store the whole hash, and just display a subset
249            self.commit = commit.Commit(commit_match.group(1)[:8])
250
251        # Detect tags in the commit message
252        elif tag_match:
253            # Remove Tested-by self, since few will take much notice
254            if (tag_match.group(1) == 'Tested-by' and
255                    tag_match.group(2).find(os.getenv('USER') + '@') != -1):
256                self.warn.append("Ignoring %s" % line)
257            elif tag_match.group(1) == 'Cc':
258                self.commit.AddCc(tag_match.group(2).split(','))
259            else:
260                self.tags.append(line);
261
262        # Well that means this is an ordinary line
263        else:
264            pos = 1
265            # Look for ugly ASCII characters
266            for ch in line:
267                # TODO: Would be nicer to report source filename and line
268                if ord(ch) > 0x80:
269                    self.warn.append("Line %d/%d ('%s') has funny ascii char" %
270                        (self.linenum, pos, line))
271                pos += 1
272
273            # Look for space before tab
274            m = re_space_before_tab.match(line)
275            if m:
276                self.warn.append('Line %d/%d has space before tab' %
277                    (self.linenum, m.start()))
278
279            # OK, we have a valid non-blank line
280            out = [line]
281            self.linenum += 1
282            self.skip_blank = False
283            if self.state == STATE_DIFFS:
284                pass
285
286            # If this is the start of the diffs section, emit our tags and
287            # change log
288            elif line == '---':
289                self.state = STATE_DIFFS
290
291                # Output the tags (signeoff first), then change list
292                out = []
293                log = self.series.MakeChangeLog(self.commit)
294                out += self.FormatTags(self.tags)
295                out += [line] + log
296            elif self.found_test:
297                if not re_allowed_after_test.match(line):
298                    self.lines_after_test += 1
299
300        return out
301
302    def Finalize(self):
303        """Close out processing of this patch stream"""
304        self.CloseCommit()
305        if self.lines_after_test:
306            self.warn.append('Found %d lines after TEST=' %
307                    self.lines_after_test)
308
309    def ProcessStream(self, infd, outfd):
310        """Copy a stream from infd to outfd, filtering out unwanting things.
311
312        This is used to process patch files one at a time.
313
314        Args:
315            infd: Input stream file object
316            outfd: Output stream file object
317        """
318        # Extract the filename from each diff, for nice warnings
319        fname = None
320        last_fname = None
321        re_fname = re.compile('diff --git a/(.*) b/.*')
322        while True:
323            line = infd.readline()
324            if not line:
325                break
326            out = self.ProcessLine(line)
327
328            # Try to detect blank lines at EOF
329            for line in out:
330                match = re_fname.match(line)
331                if match:
332                    last_fname = fname
333                    fname = match.group(1)
334                if line == '+':
335                    self.blank_count += 1
336                else:
337                    if self.blank_count and (line == '-- ' or match):
338                        self.warn.append("Found possible blank line(s) at "
339                                "end of file '%s'" % last_fname)
340                    outfd.write('+\n' * self.blank_count)
341                    outfd.write(line + '\n')
342                    self.blank_count = 0
343        self.Finalize()
344
345
346def GetMetaDataForList(commit_range, git_dir=None, count=None,
347                       series = Series()):
348    """Reads out patch series metadata from the commits
349
350    This does a 'git log' on the relevant commits and pulls out the tags we
351    are interested in.
352
353    Args:
354        commit_range: Range of commits to count (e.g. 'HEAD..base')
355        git_dir: Path to git repositiory (None to use default)
356        count: Number of commits to list, or None for no limit
357        series: Series object to add information into. By default a new series
358            is started.
359    Returns:
360        A Series object containing information about the commits.
361    """
362    params = ['git', 'log', '--no-color', '--reverse', '--no-decorate',
363                    commit_range]
364    if count is not None:
365        params[2:2] = ['-n%d' % count]
366    if git_dir:
367        params[1:1] = ['--git-dir', git_dir]
368    pipe = [params]
369    stdout = command.RunPipe(pipe, capture=True).stdout
370    ps = PatchStream(series, is_log=True)
371    for line in stdout.splitlines():
372        ps.ProcessLine(line)
373    ps.Finalize()
374    return series
375
376def GetMetaData(start, count):
377    """Reads out patch series metadata from the commits
378
379    This does a 'git log' on the relevant commits and pulls out the tags we
380    are interested in.
381
382    Args:
383        start: Commit to start from: 0=HEAD, 1=next one, etc.
384        count: Number of commits to list
385    """
386    return GetMetaDataForList('HEAD~%d' % start, None, count)
387
388def FixPatch(backup_dir, fname, series, commit):
389    """Fix up a patch file, by adding/removing as required.
390
391    We remove our tags from the patch file, insert changes lists, etc.
392    The patch file is processed in place, and overwritten.
393
394    A backup file is put into backup_dir (if not None).
395
396    Args:
397        fname: Filename to patch file to process
398        series: Series information about this patch set
399        commit: Commit object for this patch file
400    Return:
401        A list of errors, or [] if all ok.
402    """
403    handle, tmpname = tempfile.mkstemp()
404    outfd = os.fdopen(handle, 'w')
405    infd = open(fname, 'r')
406    ps = PatchStream(series)
407    ps.commit = commit
408    ps.ProcessStream(infd, outfd)
409    infd.close()
410    outfd.close()
411
412    # Create a backup file if required
413    if backup_dir:
414        shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
415    shutil.move(tmpname, fname)
416    return ps.warn
417
418def FixPatches(series, fnames):
419    """Fix up a list of patches identified by filenames
420
421    The patch files are processed in place, and overwritten.
422
423    Args:
424        series: The series object
425        fnames: List of patch files to process
426    """
427    # Current workflow creates patches, so we shouldn't need a backup
428    backup_dir = None  #tempfile.mkdtemp('clean-patch')
429    count = 0
430    for fname in fnames:
431        commit = series.commits[count]
432        commit.patch = fname
433        result = FixPatch(backup_dir, fname, series, commit)
434        if result:
435            print '%d warnings for %s:' % (len(result), fname)
436            for warn in result:
437                print '\t', warn
438            print
439        count += 1
440    print 'Cleaned %d patches' % count
441    return series
442
443def InsertCoverLetter(fname, series, count):
444    """Inserts a cover letter with the required info into patch 0
445
446    Args:
447        fname: Input / output filename of the cover letter file
448        series: Series object
449        count: Number of patches in the series
450    """
451    fd = open(fname, 'r')
452    lines = fd.readlines()
453    fd.close()
454
455    fd = open(fname, 'w')
456    text = series.cover
457    prefix = series.GetPatchPrefix()
458    for line in lines:
459        if line.startswith('Subject:'):
460            # TODO: if more than 10 patches this should save 00/xx, not 0/xx
461            line = 'Subject: [%s 0/%d] %s\n' % (prefix, count, text[0])
462
463        # Insert our cover letter
464        elif line.startswith('*** BLURB HERE ***'):
465            # First the blurb test
466            line = '\n'.join(text[1:]) + '\n'
467            if series.get('notes'):
468                line += '\n'.join(series.notes) + '\n'
469
470            # Now the change list
471            out = series.MakeChangeLog(None)
472            line += '\n' + '\n'.join(out)
473        fd.write(line)
474    fd.close()
475