xref: /openbmc/u-boot/tools/patman/patchstream.py (revision 2bb1cd53)
1# Copyright (c) 2011 The Chromium OS Authors.
2#
3# SPDX-License-Identifier:	GPL-2.0+
4#
5
6import os
7import re
8import shutil
9import tempfile
10
11import command
12import commit
13import gitutil
14from series import Series
15
16# Tags that we detect and remove
17re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:'
18    '|Reviewed-on:|Commit-\w*:')
19
20# Lines which are allowed after a TEST= line
21re_allowed_after_test = re.compile('^Signed-off-by:')
22
23# Signoffs
24re_signoff = re.compile('^Signed-off-by: *(.*)')
25
26# The start of the cover letter
27re_cover = re.compile('^Cover-letter:')
28
29# A cover letter Cc
30re_cover_cc = re.compile('^Cover-letter-cc: *(.*)')
31
32# Patch series tag
33re_series_tag = re.compile('^Series-([a-z-]*): *(.*)')
34
35# Commit series tag
36re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)')
37
38# Commit tags that we want to collect and keep
39re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)')
40
41# The start of a new commit in the git log
42re_commit = re.compile('^commit ([0-9a-f]*)$')
43
44# We detect these since checkpatch doesn't always do it
45re_space_before_tab = re.compile('^[+].* \t')
46
47# States we can be in - can we use range() and still have comments?
48STATE_MSG_HEADER = 0        # Still in the message header
49STATE_PATCH_SUBJECT = 1     # In patch subject (first line of log for a commit)
50STATE_PATCH_HEADER = 2      # In patch header (after the subject)
51STATE_DIFFS = 3             # In the diff part (past --- line)
52
53class PatchStream:
54    """Class for detecting/injecting tags in a patch or series of patches
55
56    We support processing the output of 'git log' to read out the tags we
57    are interested in. We can also process a patch file in order to remove
58    unwanted tags or inject additional ones. These correspond to the two
59    phases of processing.
60    """
61    def __init__(self, series, name=None, is_log=False):
62        self.skip_blank = False          # True to skip a single blank line
63        self.found_test = False          # Found a TEST= line
64        self.lines_after_test = 0        # MNumber of lines found after TEST=
65        self.warn = []                   # List of warnings we have collected
66        self.linenum = 1                 # Output line number we are up to
67        self.in_section = None           # Name of start...END section we are in
68        self.notes = []                  # Series notes
69        self.section = []                # The current section...END section
70        self.series = series             # Info about the patch series
71        self.is_log = is_log             # True if indent like git log
72        self.in_change = 0               # Non-zero if we are in a change list
73        self.blank_count = 0             # Number of blank lines stored up
74        self.state = STATE_MSG_HEADER    # What state are we in?
75        self.signoff = []                # Contents of signoff line
76        self.commit = None               # Current commit
77
78    def AddToSeries(self, line, name, value):
79        """Add a new Series-xxx tag.
80
81        When a Series-xxx tag is detected, we come here to record it, if we
82        are scanning a 'git log'.
83
84        Args:
85            line: Source line containing tag (useful for debug/error messages)
86            name: Tag name (part after 'Series-')
87            value: Tag value (part after 'Series-xxx: ')
88        """
89        if name == 'notes':
90            self.in_section = name
91            self.skip_blank = False
92        if self.is_log:
93            self.series.AddTag(self.commit, line, name, value)
94
95    def AddToCommit(self, line, name, value):
96        """Add a new Commit-xxx tag.
97
98        When a Commit-xxx tag is detected, we come here to record it.
99
100        Args:
101            line: Source line containing tag (useful for debug/error messages)
102            name: Tag name (part after 'Commit-')
103            value: Tag value (part after 'Commit-xxx: ')
104        """
105        if name == 'notes':
106            self.in_section = 'commit-' + name
107            self.skip_blank = False
108
109    def CloseCommit(self):
110        """Save the current commit into our commit list, and reset our state"""
111        if self.commit and self.is_log:
112            self.series.AddCommit(self.commit)
113            self.commit = None
114
115    def ProcessLine(self, line):
116        """Process a single line of a patch file or commit log
117
118        This process a line and returns a list of lines to output. The list
119        may be empty or may contain multiple output lines.
120
121        This is where all the complicated logic is located. The class's
122        state is used to move between different states and detect things
123        properly.
124
125        We can be in one of two modes:
126            self.is_log == True: This is 'git log' mode, where most output is
127                indented by 4 characters and we are scanning for tags
128
129            self.is_log == False: This is 'patch' mode, where we already have
130                all the tags, and are processing patches to remove junk we
131                don't want, and add things we think are required.
132
133        Args:
134            line: text line to process
135
136        Returns:
137            list of output lines, or [] if nothing should be output
138        """
139        # Initially we have no output. Prepare the input line string
140        out = []
141        line = line.rstrip('\n')
142
143        commit_match = re_commit.match(line) if self.is_log else None
144
145        if self.is_log:
146            if line[:4] == '    ':
147                line = line[4:]
148
149        # Handle state transition and skipping blank lines
150        series_tag_match = re_series_tag.match(line)
151        commit_tag_match = re_commit_tag.match(line)
152        cover_cc_match = re_cover_cc.match(line)
153        signoff_match = re_signoff.match(line)
154        tag_match = None
155        if self.state == STATE_PATCH_HEADER:
156            tag_match = re_tag.match(line)
157        is_blank = not line.strip()
158        if is_blank:
159            if (self.state == STATE_MSG_HEADER
160                    or self.state == STATE_PATCH_SUBJECT):
161                self.state += 1
162
163            # We don't have a subject in the text stream of patch files
164            # It has its own line with a Subject: tag
165            if not self.is_log and self.state == STATE_PATCH_SUBJECT:
166                self.state += 1
167        elif commit_match:
168            self.state = STATE_MSG_HEADER
169
170        # If we are in a section, keep collecting lines until we see END
171        if self.in_section:
172            if line == 'END':
173                if self.in_section == 'cover':
174                    self.series.cover = self.section
175                elif self.in_section == 'notes':
176                    if self.is_log:
177                        self.series.notes += self.section
178                elif self.in_section == 'commit-notes':
179                    if self.is_log:
180                        self.commit.notes += self.section
181                else:
182                    self.warn.append("Unknown section '%s'" % self.in_section)
183                self.in_section = None
184                self.skip_blank = True
185                self.section = []
186            else:
187                self.section.append(line)
188
189        # Detect the commit subject
190        elif not is_blank and self.state == STATE_PATCH_SUBJECT:
191            self.commit.subject = line
192
193        # Detect the tags we want to remove, and skip blank lines
194        elif re_remove.match(line) and not commit_tag_match:
195            self.skip_blank = True
196
197            # TEST= should be the last thing in the commit, so remove
198            # everything after it
199            if line.startswith('TEST='):
200                self.found_test = True
201        elif self.skip_blank and is_blank:
202            self.skip_blank = False
203
204        # Detect the start of a cover letter section
205        elif re_cover.match(line):
206            self.in_section = 'cover'
207            self.skip_blank = False
208
209        elif cover_cc_match:
210            value = cover_cc_match.group(1)
211            self.AddToSeries(line, 'cover-cc', value)
212
213        # If we are in a change list, key collected lines until a blank one
214        elif self.in_change:
215            if is_blank:
216                # Blank line ends this change list
217                self.in_change = 0
218            elif line == '---':
219                self.in_change = 0
220                out = self.ProcessLine(line)
221            else:
222                if self.is_log:
223                    self.series.AddChange(self.in_change, self.commit, line)
224            self.skip_blank = False
225
226        # Detect Series-xxx tags
227        elif series_tag_match:
228            name = series_tag_match.group(1)
229            value = series_tag_match.group(2)
230            if name == 'changes':
231                # value is the version number: e.g. 1, or 2
232                try:
233                    value = int(value)
234                except ValueError as str:
235                    raise ValueError("%s: Cannot decode version info '%s'" %
236                        (self.commit.hash, line))
237                self.in_change = int(value)
238            else:
239                self.AddToSeries(line, name, value)
240                self.skip_blank = True
241
242        # Detect Commit-xxx tags
243        elif commit_tag_match:
244            name = commit_tag_match.group(1)
245            value = commit_tag_match.group(2)
246            if name == 'notes':
247                self.AddToCommit(line, name, value)
248                self.skip_blank = True
249
250        # Detect the start of a new commit
251        elif commit_match:
252            self.CloseCommit()
253            self.commit = commit.Commit(commit_match.group(1))
254
255        # Detect tags in the commit message
256        elif tag_match:
257            # Remove Tested-by self, since few will take much notice
258            if (tag_match.group(1) == 'Tested-by' and
259                    tag_match.group(2).find(os.getenv('USER') + '@') != -1):
260                self.warn.append("Ignoring %s" % line)
261            elif tag_match.group(1) == 'Patch-cc':
262                self.commit.AddCc(tag_match.group(2).split(','))
263            else:
264                out = [line]
265
266        # Suppress duplicate signoffs
267        elif signoff_match:
268            if (self.is_log or not self.commit or
269                self.commit.CheckDuplicateSignoff(signoff_match.group(1))):
270                out = [line]
271
272        # Well that means this is an ordinary line
273        else:
274            pos = 1
275            # Look for ugly ASCII characters
276            for ch in line:
277                # TODO: Would be nicer to report source filename and line
278                if ord(ch) > 0x80:
279                    self.warn.append("Line %d/%d ('%s') has funny ascii char" %
280                        (self.linenum, pos, line))
281                pos += 1
282
283            # Look for space before tab
284            m = re_space_before_tab.match(line)
285            if m:
286                self.warn.append('Line %d/%d has space before tab' %
287                    (self.linenum, m.start()))
288
289            # OK, we have a valid non-blank line
290            out = [line]
291            self.linenum += 1
292            self.skip_blank = False
293            if self.state == STATE_DIFFS:
294                pass
295
296            # If this is the start of the diffs section, emit our tags and
297            # change log
298            elif line == '---':
299                self.state = STATE_DIFFS
300
301                # Output the tags (signeoff first), then change list
302                out = []
303                log = self.series.MakeChangeLog(self.commit)
304                out += [line]
305                if self.commit:
306                    out += self.commit.notes
307                out += [''] + log
308            elif self.found_test:
309                if not re_allowed_after_test.match(line):
310                    self.lines_after_test += 1
311
312        return out
313
314    def Finalize(self):
315        """Close out processing of this patch stream"""
316        self.CloseCommit()
317        if self.lines_after_test:
318            self.warn.append('Found %d lines after TEST=' %
319                    self.lines_after_test)
320
321    def ProcessStream(self, infd, outfd):
322        """Copy a stream from infd to outfd, filtering out unwanting things.
323
324        This is used to process patch files one at a time.
325
326        Args:
327            infd: Input stream file object
328            outfd: Output stream file object
329        """
330        # Extract the filename from each diff, for nice warnings
331        fname = None
332        last_fname = None
333        re_fname = re.compile('diff --git a/(.*) b/.*')
334        while True:
335            line = infd.readline()
336            if not line:
337                break
338            out = self.ProcessLine(line)
339
340            # Try to detect blank lines at EOF
341            for line in out:
342                match = re_fname.match(line)
343                if match:
344                    last_fname = fname
345                    fname = match.group(1)
346                if line == '+':
347                    self.blank_count += 1
348                else:
349                    if self.blank_count and (line == '-- ' or match):
350                        self.warn.append("Found possible blank line(s) at "
351                                "end of file '%s'" % last_fname)
352                    outfd.write('+\n' * self.blank_count)
353                    outfd.write(line + '\n')
354                    self.blank_count = 0
355        self.Finalize()
356
357
358def GetMetaDataForList(commit_range, git_dir=None, count=None,
359                       series = None, allow_overwrite=False):
360    """Reads out patch series metadata from the commits
361
362    This does a 'git log' on the relevant commits and pulls out the tags we
363    are interested in.
364
365    Args:
366        commit_range: Range of commits to count (e.g. 'HEAD..base')
367        git_dir: Path to git repositiory (None to use default)
368        count: Number of commits to list, or None for no limit
369        series: Series object to add information into. By default a new series
370            is started.
371        allow_overwrite: Allow tags to overwrite an existing tag
372    Returns:
373        A Series object containing information about the commits.
374    """
375    if not series:
376        series = Series()
377    series.allow_overwrite = allow_overwrite
378    params = gitutil.LogCmd(commit_range,reverse=True, count=count,
379                            git_dir=git_dir)
380    stdout = command.RunPipe([params], capture=True).stdout
381    ps = PatchStream(series, is_log=True)
382    for line in stdout.splitlines():
383        ps.ProcessLine(line)
384    ps.Finalize()
385    return series
386
387def GetMetaData(start, count):
388    """Reads out patch series metadata from the commits
389
390    This does a 'git log' on the relevant commits and pulls out the tags we
391    are interested in.
392
393    Args:
394        start: Commit to start from: 0=HEAD, 1=next one, etc.
395        count: Number of commits to list
396    """
397    return GetMetaDataForList('HEAD~%d' % start, None, count)
398
399def FixPatch(backup_dir, fname, series, commit):
400    """Fix up a patch file, by adding/removing as required.
401
402    We remove our tags from the patch file, insert changes lists, etc.
403    The patch file is processed in place, and overwritten.
404
405    A backup file is put into backup_dir (if not None).
406
407    Args:
408        fname: Filename to patch file to process
409        series: Series information about this patch set
410        commit: Commit object for this patch file
411    Return:
412        A list of errors, or [] if all ok.
413    """
414    handle, tmpname = tempfile.mkstemp()
415    outfd = os.fdopen(handle, 'w')
416    infd = open(fname, 'r')
417    ps = PatchStream(series)
418    ps.commit = commit
419    ps.ProcessStream(infd, outfd)
420    infd.close()
421    outfd.close()
422
423    # Create a backup file if required
424    if backup_dir:
425        shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
426    shutil.move(tmpname, fname)
427    return ps.warn
428
429def FixPatches(series, fnames):
430    """Fix up a list of patches identified by filenames
431
432    The patch files are processed in place, and overwritten.
433
434    Args:
435        series: The series object
436        fnames: List of patch files to process
437    """
438    # Current workflow creates patches, so we shouldn't need a backup
439    backup_dir = None  #tempfile.mkdtemp('clean-patch')
440    count = 0
441    for fname in fnames:
442        commit = series.commits[count]
443        commit.patch = fname
444        result = FixPatch(backup_dir, fname, series, commit)
445        if result:
446            print '%d warnings for %s:' % (len(result), fname)
447            for warn in result:
448                print '\t', warn
449            print
450        count += 1
451    print 'Cleaned %d patches' % count
452    return series
453
454def InsertCoverLetter(fname, series, count):
455    """Inserts a cover letter with the required info into patch 0
456
457    Args:
458        fname: Input / output filename of the cover letter file
459        series: Series object
460        count: Number of patches in the series
461    """
462    fd = open(fname, 'r')
463    lines = fd.readlines()
464    fd.close()
465
466    fd = open(fname, 'w')
467    text = series.cover
468    prefix = series.GetPatchPrefix()
469    for line in lines:
470        if line.startswith('Subject:'):
471            # TODO: if more than 10 patches this should save 00/xx, not 0/xx
472            line = 'Subject: [%s 0/%d] %s\n' % (prefix, count, text[0])
473
474        # Insert our cover letter
475        elif line.startswith('*** BLURB HERE ***'):
476            # First the blurb test
477            line = '\n'.join(text[1:]) + '\n'
478            if series.get('notes'):
479                line += '\n'.join(series.notes) + '\n'
480
481            # Now the change list
482            out = series.MakeChangeLog(None)
483            line += '\n' + '\n'.join(out)
484        fd.write(line)
485    fd.close()
486