xref: /openbmc/u-boot/tools/patman/patchstream.py (revision 75504e95)
1# Copyright (c) 2011 The Chromium OS Authors.
2#
3# SPDX-License-Identifier:	GPL-2.0+
4#
5
6import os
7import re
8import shutil
9import tempfile
10
11import command
12import commit
13import gitutil
14from series import Series
15
16# Tags that we detect and remove
17re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:'
18    '|Reviewed-on:|Commit-\w*:')
19
20# Lines which are allowed after a TEST= line
21re_allowed_after_test = re.compile('^Signed-off-by:')
22
23# Signoffs
24re_signoff = re.compile('^Signed-off-by:')
25
26# The start of the cover letter
27re_cover = re.compile('^Cover-letter:')
28
29# A cover letter Cc
30re_cover_cc = re.compile('^Cover-letter-cc: *(.*)')
31
32# Patch series tag
33re_series_tag = re.compile('^Series-([a-z-]*): *(.*)')
34
35# Commit series tag
36re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)')
37
38# Commit tags that we want to collect and keep
39re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)')
40
41# The start of a new commit in the git log
42re_commit = re.compile('^commit ([0-9a-f]*)$')
43
44# We detect these since checkpatch doesn't always do it
45re_space_before_tab = re.compile('^[+].* \t')
46
47# States we can be in - can we use range() and still have comments?
48STATE_MSG_HEADER = 0        # Still in the message header
49STATE_PATCH_SUBJECT = 1     # In patch subject (first line of log for a commit)
50STATE_PATCH_HEADER = 2      # In patch header (after the subject)
51STATE_DIFFS = 3             # In the diff part (past --- line)
52
53class PatchStream:
54    """Class for detecting/injecting tags in a patch or series of patches
55
56    We support processing the output of 'git log' to read out the tags we
57    are interested in. We can also process a patch file in order to remove
58    unwanted tags or inject additional ones. These correspond to the two
59    phases of processing.
60    """
61    def __init__(self, series, name=None, is_log=False):
62        self.skip_blank = False          # True to skip a single blank line
63        self.found_test = False          # Found a TEST= line
64        self.lines_after_test = 0        # MNumber of lines found after TEST=
65        self.warn = []                   # List of warnings we have collected
66        self.linenum = 1                 # Output line number we are up to
67        self.in_section = None           # Name of start...END section we are in
68        self.notes = []                  # Series notes
69        self.section = []                # The current section...END section
70        self.series = series             # Info about the patch series
71        self.is_log = is_log             # True if indent like git log
72        self.in_change = 0               # Non-zero if we are in a change list
73        self.blank_count = 0             # Number of blank lines stored up
74        self.state = STATE_MSG_HEADER    # What state are we in?
75        self.tags = []                   # Tags collected, like Tested-by...
76        self.signoff = []                # Contents of signoff line
77        self.commit = None               # Current commit
78
79    def AddToSeries(self, line, name, value):
80        """Add a new Series-xxx tag.
81
82        When a Series-xxx tag is detected, we come here to record it, if we
83        are scanning a 'git log'.
84
85        Args:
86            line: Source line containing tag (useful for debug/error messages)
87            name: Tag name (part after 'Series-')
88            value: Tag value (part after 'Series-xxx: ')
89        """
90        if name == 'notes':
91            self.in_section = name
92            self.skip_blank = False
93        if self.is_log:
94            self.series.AddTag(self.commit, line, name, value)
95
96    def AddToCommit(self, line, name, value):
97        """Add a new Commit-xxx tag.
98
99        When a Commit-xxx tag is detected, we come here to record it.
100
101        Args:
102            line: Source line containing tag (useful for debug/error messages)
103            name: Tag name (part after 'Commit-')
104            value: Tag value (part after 'Commit-xxx: ')
105        """
106        if name == 'notes':
107            self.in_section = 'commit-' + name
108            self.skip_blank = False
109
110    def CloseCommit(self):
111        """Save the current commit into our commit list, and reset our state"""
112        if self.commit and self.is_log:
113            self.series.AddCommit(self.commit)
114            self.commit = None
115
116    def FormatTags(self, tags):
117        out_list = []
118        for tag in sorted(tags):
119            if tag.startswith('Cc:'):
120                tag_list = tag[4:].split(',')
121                out_list += gitutil.BuildEmailList(tag_list, 'Cc:')
122            else:
123                out_list.append(tag)
124        return out_list
125
126    def ProcessLine(self, line):
127        """Process a single line of a patch file or commit log
128
129        This process a line and returns a list of lines to output. The list
130        may be empty or may contain multiple output lines.
131
132        This is where all the complicated logic is located. The class's
133        state is used to move between different states and detect things
134        properly.
135
136        We can be in one of two modes:
137            self.is_log == True: This is 'git log' mode, where most output is
138                indented by 4 characters and we are scanning for tags
139
140            self.is_log == False: This is 'patch' mode, where we already have
141                all the tags, and are processing patches to remove junk we
142                don't want, and add things we think are required.
143
144        Args:
145            line: text line to process
146
147        Returns:
148            list of output lines, or [] if nothing should be output
149        """
150        # Initially we have no output. Prepare the input line string
151        out = []
152        line = line.rstrip('\n')
153        if self.is_log:
154            if line[:4] == '    ':
155                line = line[4:]
156
157        # Handle state transition and skipping blank lines
158        series_tag_match = re_series_tag.match(line)
159        commit_tag_match = re_commit_tag.match(line)
160        commit_match = re_commit.match(line) if self.is_log else None
161        cover_cc_match = re_cover_cc.match(line)
162        tag_match = None
163        if self.state == STATE_PATCH_HEADER:
164            tag_match = re_tag.match(line)
165        is_blank = not line.strip()
166        if is_blank:
167            if (self.state == STATE_MSG_HEADER
168                    or self.state == STATE_PATCH_SUBJECT):
169                self.state += 1
170
171            # We don't have a subject in the text stream of patch files
172            # It has its own line with a Subject: tag
173            if not self.is_log and self.state == STATE_PATCH_SUBJECT:
174                self.state += 1
175        elif commit_match:
176            self.state = STATE_MSG_HEADER
177
178        # If we are in a section, keep collecting lines until we see END
179        if self.in_section:
180            if line == 'END':
181                if self.in_section == 'cover':
182                    self.series.cover = self.section
183                elif self.in_section == 'notes':
184                    if self.is_log:
185                        self.series.notes += self.section
186                elif self.in_section == 'commit-notes':
187                    if self.is_log:
188                        self.commit.notes += self.section
189                else:
190                    self.warn.append("Unknown section '%s'" % self.in_section)
191                self.in_section = None
192                self.skip_blank = True
193                self.section = []
194            else:
195                self.section.append(line)
196
197        # Detect the commit subject
198        elif not is_blank and self.state == STATE_PATCH_SUBJECT:
199            self.commit.subject = line
200
201        # Detect the tags we want to remove, and skip blank lines
202        elif re_remove.match(line) and not commit_tag_match:
203            self.skip_blank = True
204
205            # TEST= should be the last thing in the commit, so remove
206            # everything after it
207            if line.startswith('TEST='):
208                self.found_test = True
209        elif self.skip_blank and is_blank:
210            self.skip_blank = False
211
212        # Detect the start of a cover letter section
213        elif re_cover.match(line):
214            self.in_section = 'cover'
215            self.skip_blank = False
216
217        elif cover_cc_match:
218            value = cover_cc_match.group(1)
219            self.AddToSeries(line, 'cover-cc', value)
220
221        # If we are in a change list, key collected lines until a blank one
222        elif self.in_change:
223            if is_blank:
224                # Blank line ends this change list
225                self.in_change = 0
226            elif line == '---' or re_signoff.match(line):
227                self.in_change = 0
228                out = self.ProcessLine(line)
229            else:
230                if self.is_log:
231                    self.series.AddChange(self.in_change, self.commit, line)
232            self.skip_blank = False
233
234        # Detect Series-xxx tags
235        elif series_tag_match:
236            name = series_tag_match.group(1)
237            value = series_tag_match.group(2)
238            if name == 'changes':
239                # value is the version number: e.g. 1, or 2
240                try:
241                    value = int(value)
242                except ValueError as str:
243                    raise ValueError("%s: Cannot decode version info '%s'" %
244                        (self.commit.hash, line))
245                self.in_change = int(value)
246            else:
247                self.AddToSeries(line, name, value)
248                self.skip_blank = True
249
250        # Detect Commit-xxx tags
251        elif commit_tag_match:
252            name = commit_tag_match.group(1)
253            value = commit_tag_match.group(2)
254            if name == 'notes':
255                self.AddToCommit(line, name, value)
256                self.skip_blank = True
257
258        # Detect the start of a new commit
259        elif commit_match:
260            self.CloseCommit()
261            # TODO: We should store the whole hash, and just display a subset
262            self.commit = commit.Commit(commit_match.group(1)[:8])
263
264        # Detect tags in the commit message
265        elif tag_match:
266            # Remove Tested-by self, since few will take much notice
267            if (tag_match.group(1) == 'Tested-by' and
268                    tag_match.group(2).find(os.getenv('USER') + '@') != -1):
269                self.warn.append("Ignoring %s" % line)
270            elif tag_match.group(1) == 'Patch-cc':
271                self.commit.AddCc(tag_match.group(2).split(','))
272            else:
273                self.tags.append(line);
274
275        # Well that means this is an ordinary line
276        else:
277            pos = 1
278            # Look for ugly ASCII characters
279            for ch in line:
280                # TODO: Would be nicer to report source filename and line
281                if ord(ch) > 0x80:
282                    self.warn.append("Line %d/%d ('%s') has funny ascii char" %
283                        (self.linenum, pos, line))
284                pos += 1
285
286            # Look for space before tab
287            m = re_space_before_tab.match(line)
288            if m:
289                self.warn.append('Line %d/%d has space before tab' %
290                    (self.linenum, m.start()))
291
292            # OK, we have a valid non-blank line
293            out = [line]
294            self.linenum += 1
295            self.skip_blank = False
296            if self.state == STATE_DIFFS:
297                pass
298
299            # If this is the start of the diffs section, emit our tags and
300            # change log
301            elif line == '---':
302                self.state = STATE_DIFFS
303
304                # Output the tags (signeoff first), then change list
305                out = []
306                log = self.series.MakeChangeLog(self.commit)
307                out += self.FormatTags(self.tags)
308                out += [line] + self.commit.notes + [''] + log
309            elif self.found_test:
310                if not re_allowed_after_test.match(line):
311                    self.lines_after_test += 1
312
313        return out
314
315    def Finalize(self):
316        """Close out processing of this patch stream"""
317        self.CloseCommit()
318        if self.lines_after_test:
319            self.warn.append('Found %d lines after TEST=' %
320                    self.lines_after_test)
321
322    def ProcessStream(self, infd, outfd):
323        """Copy a stream from infd to outfd, filtering out unwanting things.
324
325        This is used to process patch files one at a time.
326
327        Args:
328            infd: Input stream file object
329            outfd: Output stream file object
330        """
331        # Extract the filename from each diff, for nice warnings
332        fname = None
333        last_fname = None
334        re_fname = re.compile('diff --git a/(.*) b/.*')
335        while True:
336            line = infd.readline()
337            if not line:
338                break
339            out = self.ProcessLine(line)
340
341            # Try to detect blank lines at EOF
342            for line in out:
343                match = re_fname.match(line)
344                if match:
345                    last_fname = fname
346                    fname = match.group(1)
347                if line == '+':
348                    self.blank_count += 1
349                else:
350                    if self.blank_count and (line == '-- ' or match):
351                        self.warn.append("Found possible blank line(s) at "
352                                "end of file '%s'" % last_fname)
353                    outfd.write('+\n' * self.blank_count)
354                    outfd.write(line + '\n')
355                    self.blank_count = 0
356        self.Finalize()
357
358
359def GetMetaDataForList(commit_range, git_dir=None, count=None,
360                       series = Series()):
361    """Reads out patch series metadata from the commits
362
363    This does a 'git log' on the relevant commits and pulls out the tags we
364    are interested in.
365
366    Args:
367        commit_range: Range of commits to count (e.g. 'HEAD..base')
368        git_dir: Path to git repositiory (None to use default)
369        count: Number of commits to list, or None for no limit
370        series: Series object to add information into. By default a new series
371            is started.
372    Returns:
373        A Series object containing information about the commits.
374    """
375    params = ['git', 'log', '--no-color', '--reverse', '--no-decorate',
376                    commit_range]
377    if count is not None:
378        params[2:2] = ['-n%d' % count]
379    if git_dir:
380        params[1:1] = ['--git-dir', git_dir]
381    pipe = [params]
382    stdout = command.RunPipe(pipe, capture=True).stdout
383    ps = PatchStream(series, is_log=True)
384    for line in stdout.splitlines():
385        ps.ProcessLine(line)
386    ps.Finalize()
387    return series
388
389def GetMetaData(start, count):
390    """Reads out patch series metadata from the commits
391
392    This does a 'git log' on the relevant commits and pulls out the tags we
393    are interested in.
394
395    Args:
396        start: Commit to start from: 0=HEAD, 1=next one, etc.
397        count: Number of commits to list
398    """
399    return GetMetaDataForList('HEAD~%d' % start, None, count)
400
401def FixPatch(backup_dir, fname, series, commit):
402    """Fix up a patch file, by adding/removing as required.
403
404    We remove our tags from the patch file, insert changes lists, etc.
405    The patch file is processed in place, and overwritten.
406
407    A backup file is put into backup_dir (if not None).
408
409    Args:
410        fname: Filename to patch file to process
411        series: Series information about this patch set
412        commit: Commit object for this patch file
413    Return:
414        A list of errors, or [] if all ok.
415    """
416    handle, tmpname = tempfile.mkstemp()
417    outfd = os.fdopen(handle, 'w')
418    infd = open(fname, 'r')
419    ps = PatchStream(series)
420    ps.commit = commit
421    ps.ProcessStream(infd, outfd)
422    infd.close()
423    outfd.close()
424
425    # Create a backup file if required
426    if backup_dir:
427        shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
428    shutil.move(tmpname, fname)
429    return ps.warn
430
431def FixPatches(series, fnames):
432    """Fix up a list of patches identified by filenames
433
434    The patch files are processed in place, and overwritten.
435
436    Args:
437        series: The series object
438        fnames: List of patch files to process
439    """
440    # Current workflow creates patches, so we shouldn't need a backup
441    backup_dir = None  #tempfile.mkdtemp('clean-patch')
442    count = 0
443    for fname in fnames:
444        commit = series.commits[count]
445        commit.patch = fname
446        result = FixPatch(backup_dir, fname, series, commit)
447        if result:
448            print '%d warnings for %s:' % (len(result), fname)
449            for warn in result:
450                print '\t', warn
451            print
452        count += 1
453    print 'Cleaned %d patches' % count
454    return series
455
456def InsertCoverLetter(fname, series, count):
457    """Inserts a cover letter with the required info into patch 0
458
459    Args:
460        fname: Input / output filename of the cover letter file
461        series: Series object
462        count: Number of patches in the series
463    """
464    fd = open(fname, 'r')
465    lines = fd.readlines()
466    fd.close()
467
468    fd = open(fname, 'w')
469    text = series.cover
470    prefix = series.GetPatchPrefix()
471    for line in lines:
472        if line.startswith('Subject:'):
473            # TODO: if more than 10 patches this should save 00/xx, not 0/xx
474            line = 'Subject: [%s 0/%d] %s\n' % (prefix, count, text[0])
475
476        # Insert our cover letter
477        elif line.startswith('*** BLURB HERE ***'):
478            # First the blurb test
479            line = '\n'.join(text[1:]) + '\n'
480            if series.get('notes'):
481                line += '\n'.join(series.notes) + '\n'
482
483            # Now the change list
484            out = series.MakeChangeLog(None)
485            line += '\n' + '\n'.join(out)
486        fd.write(line)
487    fd.close()
488