xref: /openbmc/u-boot/tools/patman/patchstream.py (revision baefb63a)
1# Copyright (c) 2011 The Chromium OS Authors.
2#
3# SPDX-License-Identifier:	GPL-2.0+
4#
5
6import math
7import os
8import re
9import shutil
10import tempfile
11
12import command
13import commit
14import gitutil
15from series import Series
16
17# Tags that we detect and remove
18re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:'
19    '|Reviewed-on:|Commit-\w*:')
20
21# Lines which are allowed after a TEST= line
22re_allowed_after_test = re.compile('^Signed-off-by:')
23
24# Signoffs
25re_signoff = re.compile('^Signed-off-by: *(.*)')
26
27# The start of the cover letter
28re_cover = re.compile('^Cover-letter:')
29
30# A cover letter Cc
31re_cover_cc = re.compile('^Cover-letter-cc: *(.*)')
32
33# Patch series tag
34re_series_tag = re.compile('^Series-([a-z-]*): *(.*)')
35
36# Commit series tag
37re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)')
38
39# Commit tags that we want to collect and keep
40re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)')
41
42# The start of a new commit in the git log
43re_commit = re.compile('^commit ([0-9a-f]*)$')
44
45# We detect these since checkpatch doesn't always do it
46re_space_before_tab = re.compile('^[+].* \t')
47
48# States we can be in - can we use range() and still have comments?
49STATE_MSG_HEADER = 0        # Still in the message header
50STATE_PATCH_SUBJECT = 1     # In patch subject (first line of log for a commit)
51STATE_PATCH_HEADER = 2      # In patch header (after the subject)
52STATE_DIFFS = 3             # In the diff part (past --- line)
53
54class PatchStream:
55    """Class for detecting/injecting tags in a patch or series of patches
56
57    We support processing the output of 'git log' to read out the tags we
58    are interested in. We can also process a patch file in order to remove
59    unwanted tags or inject additional ones. These correspond to the two
60    phases of processing.
61    """
62    def __init__(self, series, name=None, is_log=False):
63        self.skip_blank = False          # True to skip a single blank line
64        self.found_test = False          # Found a TEST= line
65        self.lines_after_test = 0        # MNumber of lines found after TEST=
66        self.warn = []                   # List of warnings we have collected
67        self.linenum = 1                 # Output line number we are up to
68        self.in_section = None           # Name of start...END section we are in
69        self.notes = []                  # Series notes
70        self.section = []                # The current section...END section
71        self.series = series             # Info about the patch series
72        self.is_log = is_log             # True if indent like git log
73        self.in_change = 0               # Non-zero if we are in a change list
74        self.blank_count = 0             # Number of blank lines stored up
75        self.state = STATE_MSG_HEADER    # What state are we in?
76        self.signoff = []                # Contents of signoff line
77        self.commit = None               # Current commit
78
79    def AddToSeries(self, line, name, value):
80        """Add a new Series-xxx tag.
81
82        When a Series-xxx tag is detected, we come here to record it, if we
83        are scanning a 'git log'.
84
85        Args:
86            line: Source line containing tag (useful for debug/error messages)
87            name: Tag name (part after 'Series-')
88            value: Tag value (part after 'Series-xxx: ')
89        """
90        if name == 'notes':
91            self.in_section = name
92            self.skip_blank = False
93        if self.is_log:
94            self.series.AddTag(self.commit, line, name, value)
95
96    def AddToCommit(self, line, name, value):
97        """Add a new Commit-xxx tag.
98
99        When a Commit-xxx tag is detected, we come here to record it.
100
101        Args:
102            line: Source line containing tag (useful for debug/error messages)
103            name: Tag name (part after 'Commit-')
104            value: Tag value (part after 'Commit-xxx: ')
105        """
106        if name == 'notes':
107            self.in_section = 'commit-' + name
108            self.skip_blank = False
109
110    def CloseCommit(self):
111        """Save the current commit into our commit list, and reset our state"""
112        if self.commit and self.is_log:
113            self.series.AddCommit(self.commit)
114            self.commit = None
115        # If 'END' is missing in a 'Cover-letter' section, and that section
116        # happens to show up at the very end of the commit message, this is
117        # the chance for us to fix it up.
118        if self.in_section == 'cover' and self.is_log:
119            self.series.cover = self.section
120            self.in_section = None
121            self.skip_blank = True
122            self.section = []
123
124    def ProcessLine(self, line):
125        """Process a single line of a patch file or commit log
126
127        This process a line and returns a list of lines to output. The list
128        may be empty or may contain multiple output lines.
129
130        This is where all the complicated logic is located. The class's
131        state is used to move between different states and detect things
132        properly.
133
134        We can be in one of two modes:
135            self.is_log == True: This is 'git log' mode, where most output is
136                indented by 4 characters and we are scanning for tags
137
138            self.is_log == False: This is 'patch' mode, where we already have
139                all the tags, and are processing patches to remove junk we
140                don't want, and add things we think are required.
141
142        Args:
143            line: text line to process
144
145        Returns:
146            list of output lines, or [] if nothing should be output
147        """
148        # Initially we have no output. Prepare the input line string
149        out = []
150        line = line.rstrip('\n')
151
152        commit_match = re_commit.match(line) if self.is_log else None
153
154        if self.is_log:
155            if line[:4] == '    ':
156                line = line[4:]
157
158        # Handle state transition and skipping blank lines
159        series_tag_match = re_series_tag.match(line)
160        commit_tag_match = re_commit_tag.match(line)
161        cover_match = re_cover.match(line)
162        cover_cc_match = re_cover_cc.match(line)
163        signoff_match = re_signoff.match(line)
164        tag_match = None
165        if self.state == STATE_PATCH_HEADER:
166            tag_match = re_tag.match(line)
167        is_blank = not line.strip()
168        if is_blank:
169            if (self.state == STATE_MSG_HEADER
170                    or self.state == STATE_PATCH_SUBJECT):
171                self.state += 1
172
173            # We don't have a subject in the text stream of patch files
174            # It has its own line with a Subject: tag
175            if not self.is_log and self.state == STATE_PATCH_SUBJECT:
176                self.state += 1
177        elif commit_match:
178            self.state = STATE_MSG_HEADER
179
180        # If a tag is detected, or a new commit starts
181        if series_tag_match or commit_tag_match or \
182           cover_match or cover_cc_match or signoff_match or \
183           self.state == STATE_MSG_HEADER:
184            # but we are already in a section, this means 'END' is missing
185            # for that section, fix it up.
186            if self.in_section:
187                self.warn.append("Missing 'END' in section '%s'" % self.in_section)
188                if self.in_section == 'cover':
189                    self.series.cover = self.section
190                elif self.in_section == 'notes':
191                    if self.is_log:
192                        self.series.notes += self.section
193                elif self.in_section == 'commit-notes':
194                    if self.is_log:
195                        self.commit.notes += self.section
196                else:
197                    self.warn.append("Unknown section '%s'" % self.in_section)
198                self.in_section = None
199                self.skip_blank = True
200                self.section = []
201            # but we are already in a change list, that means a blank line
202            # is missing, fix it up.
203            if self.in_change:
204                self.warn.append("Missing 'blank line' in section 'Series-changes'")
205                self.in_change = 0
206
207        # If we are in a section, keep collecting lines until we see END
208        if self.in_section:
209            if line == 'END':
210                if self.in_section == 'cover':
211                    self.series.cover = self.section
212                elif self.in_section == 'notes':
213                    if self.is_log:
214                        self.series.notes += self.section
215                elif self.in_section == 'commit-notes':
216                    if self.is_log:
217                        self.commit.notes += self.section
218                else:
219                    self.warn.append("Unknown section '%s'" % self.in_section)
220                self.in_section = None
221                self.skip_blank = True
222                self.section = []
223            else:
224                self.section.append(line)
225
226        # Detect the commit subject
227        elif not is_blank and self.state == STATE_PATCH_SUBJECT:
228            self.commit.subject = line
229
230        # Detect the tags we want to remove, and skip blank lines
231        elif re_remove.match(line) and not commit_tag_match:
232            self.skip_blank = True
233
234            # TEST= should be the last thing in the commit, so remove
235            # everything after it
236            if line.startswith('TEST='):
237                self.found_test = True
238        elif self.skip_blank and is_blank:
239            self.skip_blank = False
240
241        # Detect the start of a cover letter section
242        elif cover_match:
243            self.in_section = 'cover'
244            self.skip_blank = False
245
246        elif cover_cc_match:
247            value = cover_cc_match.group(1)
248            self.AddToSeries(line, 'cover-cc', value)
249
250        # If we are in a change list, key collected lines until a blank one
251        elif self.in_change:
252            if is_blank:
253                # Blank line ends this change list
254                self.in_change = 0
255            elif line == '---':
256                self.in_change = 0
257                out = self.ProcessLine(line)
258            else:
259                if self.is_log:
260                    self.series.AddChange(self.in_change, self.commit, line)
261            self.skip_blank = False
262
263        # Detect Series-xxx tags
264        elif series_tag_match:
265            name = series_tag_match.group(1)
266            value = series_tag_match.group(2)
267            if name == 'changes':
268                # value is the version number: e.g. 1, or 2
269                try:
270                    value = int(value)
271                except ValueError as str:
272                    raise ValueError("%s: Cannot decode version info '%s'" %
273                        (self.commit.hash, line))
274                self.in_change = int(value)
275            else:
276                self.AddToSeries(line, name, value)
277                self.skip_blank = True
278
279        # Detect Commit-xxx tags
280        elif commit_tag_match:
281            name = commit_tag_match.group(1)
282            value = commit_tag_match.group(2)
283            if name == 'notes':
284                self.AddToCommit(line, name, value)
285                self.skip_blank = True
286
287        # Detect the start of a new commit
288        elif commit_match:
289            self.CloseCommit()
290            self.commit = commit.Commit(commit_match.group(1))
291
292        # Detect tags in the commit message
293        elif tag_match:
294            # Remove Tested-by self, since few will take much notice
295            if (tag_match.group(1) == 'Tested-by' and
296                    tag_match.group(2).find(os.getenv('USER') + '@') != -1):
297                self.warn.append("Ignoring %s" % line)
298            elif tag_match.group(1) == 'Patch-cc':
299                self.commit.AddCc(tag_match.group(2).split(','))
300            else:
301                out = [line]
302
303        # Suppress duplicate signoffs
304        elif signoff_match:
305            if (self.is_log or not self.commit or
306                self.commit.CheckDuplicateSignoff(signoff_match.group(1))):
307                out = [line]
308
309        # Well that means this is an ordinary line
310        else:
311            # Look for space before tab
312            m = re_space_before_tab.match(line)
313            if m:
314                self.warn.append('Line %d/%d has space before tab' %
315                    (self.linenum, m.start()))
316
317            # OK, we have a valid non-blank line
318            out = [line]
319            self.linenum += 1
320            self.skip_blank = False
321            if self.state == STATE_DIFFS:
322                pass
323
324            # If this is the start of the diffs section, emit our tags and
325            # change log
326            elif line == '---':
327                self.state = STATE_DIFFS
328
329                # Output the tags (signeoff first), then change list
330                out = []
331                log = self.series.MakeChangeLog(self.commit)
332                out += [line]
333                if self.commit:
334                    out += self.commit.notes
335                out += [''] + log
336            elif self.found_test:
337                if not re_allowed_after_test.match(line):
338                    self.lines_after_test += 1
339
340        return out
341
342    def Finalize(self):
343        """Close out processing of this patch stream"""
344        self.CloseCommit()
345        if self.lines_after_test:
346            self.warn.append('Found %d lines after TEST=' %
347                    self.lines_after_test)
348
349    def ProcessStream(self, infd, outfd):
350        """Copy a stream from infd to outfd, filtering out unwanting things.
351
352        This is used to process patch files one at a time.
353
354        Args:
355            infd: Input stream file object
356            outfd: Output stream file object
357        """
358        # Extract the filename from each diff, for nice warnings
359        fname = None
360        last_fname = None
361        re_fname = re.compile('diff --git a/(.*) b/.*')
362        while True:
363            line = infd.readline()
364            if not line:
365                break
366            out = self.ProcessLine(line)
367
368            # Try to detect blank lines at EOF
369            for line in out:
370                match = re_fname.match(line)
371                if match:
372                    last_fname = fname
373                    fname = match.group(1)
374                if line == '+':
375                    self.blank_count += 1
376                else:
377                    if self.blank_count and (line == '-- ' or match):
378                        self.warn.append("Found possible blank line(s) at "
379                                "end of file '%s'" % last_fname)
380                    outfd.write('+\n' * self.blank_count)
381                    outfd.write(line + '\n')
382                    self.blank_count = 0
383        self.Finalize()
384
385
386def GetMetaDataForList(commit_range, git_dir=None, count=None,
387                       series = None, allow_overwrite=False):
388    """Reads out patch series metadata from the commits
389
390    This does a 'git log' on the relevant commits and pulls out the tags we
391    are interested in.
392
393    Args:
394        commit_range: Range of commits to count (e.g. 'HEAD..base')
395        git_dir: Path to git repositiory (None to use default)
396        count: Number of commits to list, or None for no limit
397        series: Series object to add information into. By default a new series
398            is started.
399        allow_overwrite: Allow tags to overwrite an existing tag
400    Returns:
401        A Series object containing information about the commits.
402    """
403    if not series:
404        series = Series()
405    series.allow_overwrite = allow_overwrite
406    params = gitutil.LogCmd(commit_range, reverse=True, count=count,
407                            git_dir=git_dir)
408    stdout = command.RunPipe([params], capture=True).stdout
409    ps = PatchStream(series, is_log=True)
410    for line in stdout.splitlines():
411        ps.ProcessLine(line)
412    ps.Finalize()
413    return series
414
415def GetMetaData(start, count):
416    """Reads out patch series metadata from the commits
417
418    This does a 'git log' on the relevant commits and pulls out the tags we
419    are interested in.
420
421    Args:
422        start: Commit to start from: 0=HEAD, 1=next one, etc.
423        count: Number of commits to list
424    """
425    return GetMetaDataForList('HEAD~%d' % start, None, count)
426
427def GetMetaDataForTest(text):
428    """Process metadata from a file containing a git log. Used for tests
429
430    Args:
431        text:
432    """
433    series = Series()
434    ps = PatchStream(series, is_log=True)
435    for line in text.splitlines():
436        ps.ProcessLine(line)
437    ps.Finalize()
438    return series
439
440def FixPatch(backup_dir, fname, series, commit):
441    """Fix up a patch file, by adding/removing as required.
442
443    We remove our tags from the patch file, insert changes lists, etc.
444    The patch file is processed in place, and overwritten.
445
446    A backup file is put into backup_dir (if not None).
447
448    Args:
449        fname: Filename to patch file to process
450        series: Series information about this patch set
451        commit: Commit object for this patch file
452    Return:
453        A list of errors, or [] if all ok.
454    """
455    handle, tmpname = tempfile.mkstemp()
456    outfd = os.fdopen(handle, 'w')
457    infd = open(fname, 'r')
458    ps = PatchStream(series)
459    ps.commit = commit
460    ps.ProcessStream(infd, outfd)
461    infd.close()
462    outfd.close()
463
464    # Create a backup file if required
465    if backup_dir:
466        shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
467    shutil.move(tmpname, fname)
468    return ps.warn
469
470def FixPatches(series, fnames):
471    """Fix up a list of patches identified by filenames
472
473    The patch files are processed in place, and overwritten.
474
475    Args:
476        series: The series object
477        fnames: List of patch files to process
478    """
479    # Current workflow creates patches, so we shouldn't need a backup
480    backup_dir = None  #tempfile.mkdtemp('clean-patch')
481    count = 0
482    for fname in fnames:
483        commit = series.commits[count]
484        commit.patch = fname
485        result = FixPatch(backup_dir, fname, series, commit)
486        if result:
487            print('%d warnings for %s:' % (len(result), fname))
488            for warn in result:
489                print('\t', warn)
490            print
491        count += 1
492    print('Cleaned %d patches' % count)
493
494def InsertCoverLetter(fname, series, count):
495    """Inserts a cover letter with the required info into patch 0
496
497    Args:
498        fname: Input / output filename of the cover letter file
499        series: Series object
500        count: Number of patches in the series
501    """
502    fd = open(fname, 'r')
503    lines = fd.readlines()
504    fd.close()
505
506    fd = open(fname, 'w')
507    text = series.cover
508    prefix = series.GetPatchPrefix()
509    for line in lines:
510        if line.startswith('Subject:'):
511            # if more than 10 or 100 patches, it should say 00/xx, 000/xxx, etc
512            zero_repeat = int(math.log10(count)) + 1
513            zero = '0' * zero_repeat
514            line = 'Subject: [%s %s/%d] %s\n' % (prefix, zero, count, text[0])
515
516        # Insert our cover letter
517        elif line.startswith('*** BLURB HERE ***'):
518            # First the blurb test
519            line = '\n'.join(text[1:]) + '\n'
520            if series.get('notes'):
521                line += '\n'.join(series.notes) + '\n'
522
523            # Now the change list
524            out = series.MakeChangeLog(None)
525            line += '\n' + '\n'.join(out)
526        fd.write(line)
527    fd.close()
528