xref: /openbmc/u-boot/tools/patman/patchstream.py (revision dd1033e4)
1# SPDX-License-Identifier: GPL-2.0+
2# Copyright (c) 2011 The Chromium OS Authors.
3#
4
5import math
6import os
7import re
8import shutil
9import tempfile
10
11import command
12import commit
13import gitutil
14from series import Series
15
16# Tags that we detect and remove
17re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:'
18    '|Reviewed-on:|Commit-\w*:')
19
20# Lines which are allowed after a TEST= line
21re_allowed_after_test = re.compile('^Signed-off-by:')
22
23# Signoffs
24re_signoff = re.compile('^Signed-off-by: *(.*)')
25
26# The start of the cover letter
27re_cover = re.compile('^Cover-letter:')
28
29# A cover letter Cc
30re_cover_cc = re.compile('^Cover-letter-cc: *(.*)')
31
32# Patch series tag
33re_series_tag = re.compile('^Series-([a-z-]*): *(.*)')
34
35# Commit series tag
36re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)')
37
38# Commit tags that we want to collect and keep
39re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)')
40
41# The start of a new commit in the git log
42re_commit = re.compile('^commit ([0-9a-f]*)$')
43
44# We detect these since checkpatch doesn't always do it
45re_space_before_tab = re.compile('^[+].* \t')
46
47# States we can be in - can we use range() and still have comments?
48STATE_MSG_HEADER = 0        # Still in the message header
49STATE_PATCH_SUBJECT = 1     # In patch subject (first line of log for a commit)
50STATE_PATCH_HEADER = 2      # In patch header (after the subject)
51STATE_DIFFS = 3             # In the diff part (past --- line)
52
53class PatchStream:
54    """Class for detecting/injecting tags in a patch or series of patches
55
56    We support processing the output of 'git log' to read out the tags we
57    are interested in. We can also process a patch file in order to remove
58    unwanted tags or inject additional ones. These correspond to the two
59    phases of processing.
60    """
61    def __init__(self, series, name=None, is_log=False):
62        self.skip_blank = False          # True to skip a single blank line
63        self.found_test = False          # Found a TEST= line
64        self.lines_after_test = 0        # MNumber of lines found after TEST=
65        self.warn = []                   # List of warnings we have collected
66        self.linenum = 1                 # Output line number we are up to
67        self.in_section = None           # Name of start...END section we are in
68        self.notes = []                  # Series notes
69        self.section = []                # The current section...END section
70        self.series = series             # Info about the patch series
71        self.is_log = is_log             # True if indent like git log
72        self.in_change = 0               # Non-zero if we are in a change list
73        self.blank_count = 0             # Number of blank lines stored up
74        self.state = STATE_MSG_HEADER    # What state are we in?
75        self.signoff = []                # Contents of signoff line
76        self.commit = None               # Current commit
77
78    def AddToSeries(self, line, name, value):
79        """Add a new Series-xxx tag.
80
81        When a Series-xxx tag is detected, we come here to record it, if we
82        are scanning a 'git log'.
83
84        Args:
85            line: Source line containing tag (useful for debug/error messages)
86            name: Tag name (part after 'Series-')
87            value: Tag value (part after 'Series-xxx: ')
88        """
89        if name == 'notes':
90            self.in_section = name
91            self.skip_blank = False
92        if self.is_log:
93            self.series.AddTag(self.commit, line, name, value)
94
95    def AddToCommit(self, line, name, value):
96        """Add a new Commit-xxx tag.
97
98        When a Commit-xxx tag is detected, we come here to record it.
99
100        Args:
101            line: Source line containing tag (useful for debug/error messages)
102            name: Tag name (part after 'Commit-')
103            value: Tag value (part after 'Commit-xxx: ')
104        """
105        if name == 'notes':
106            self.in_section = 'commit-' + name
107            self.skip_blank = False
108
109    def CloseCommit(self):
110        """Save the current commit into our commit list, and reset our state"""
111        if self.commit and self.is_log:
112            self.series.AddCommit(self.commit)
113            self.commit = None
114        # If 'END' is missing in a 'Cover-letter' section, and that section
115        # happens to show up at the very end of the commit message, this is
116        # the chance for us to fix it up.
117        if self.in_section == 'cover' and self.is_log:
118            self.series.cover = self.section
119            self.in_section = None
120            self.skip_blank = True
121            self.section = []
122
123    def ProcessLine(self, line):
124        """Process a single line of a patch file or commit log
125
126        This process a line and returns a list of lines to output. The list
127        may be empty or may contain multiple output lines.
128
129        This is where all the complicated logic is located. The class's
130        state is used to move between different states and detect things
131        properly.
132
133        We can be in one of two modes:
134            self.is_log == True: This is 'git log' mode, where most output is
135                indented by 4 characters and we are scanning for tags
136
137            self.is_log == False: This is 'patch' mode, where we already have
138                all the tags, and are processing patches to remove junk we
139                don't want, and add things we think are required.
140
141        Args:
142            line: text line to process
143
144        Returns:
145            list of output lines, or [] if nothing should be output
146        """
147        # Initially we have no output. Prepare the input line string
148        out = []
149        line = line.rstrip('\n')
150
151        commit_match = re_commit.match(line) if self.is_log else None
152
153        if self.is_log:
154            if line[:4] == '    ':
155                line = line[4:]
156
157        # Handle state transition and skipping blank lines
158        series_tag_match = re_series_tag.match(line)
159        commit_tag_match = re_commit_tag.match(line)
160        cover_match = re_cover.match(line)
161        cover_cc_match = re_cover_cc.match(line)
162        signoff_match = re_signoff.match(line)
163        tag_match = None
164        if self.state == STATE_PATCH_HEADER:
165            tag_match = re_tag.match(line)
166        is_blank = not line.strip()
167        if is_blank:
168            if (self.state == STATE_MSG_HEADER
169                    or self.state == STATE_PATCH_SUBJECT):
170                self.state += 1
171
172            # We don't have a subject in the text stream of patch files
173            # It has its own line with a Subject: tag
174            if not self.is_log and self.state == STATE_PATCH_SUBJECT:
175                self.state += 1
176        elif commit_match:
177            self.state = STATE_MSG_HEADER
178
179        # If a tag is detected, or a new commit starts
180        if series_tag_match or commit_tag_match or \
181           cover_match or cover_cc_match or signoff_match or \
182           self.state == STATE_MSG_HEADER:
183            # but we are already in a section, this means 'END' is missing
184            # for that section, fix it up.
185            if self.in_section:
186                self.warn.append("Missing 'END' in section '%s'" % self.in_section)
187                if self.in_section == 'cover':
188                    self.series.cover = self.section
189                elif self.in_section == 'notes':
190                    if self.is_log:
191                        self.series.notes += self.section
192                elif self.in_section == 'commit-notes':
193                    if self.is_log:
194                        self.commit.notes += self.section
195                else:
196                    self.warn.append("Unknown section '%s'" % self.in_section)
197                self.in_section = None
198                self.skip_blank = True
199                self.section = []
200            # but we are already in a change list, that means a blank line
201            # is missing, fix it up.
202            if self.in_change:
203                self.warn.append("Missing 'blank line' in section 'Series-changes'")
204                self.in_change = 0
205
206        # If we are in a section, keep collecting lines until we see END
207        if self.in_section:
208            if line == 'END':
209                if self.in_section == 'cover':
210                    self.series.cover = self.section
211                elif self.in_section == 'notes':
212                    if self.is_log:
213                        self.series.notes += self.section
214                elif self.in_section == 'commit-notes':
215                    if self.is_log:
216                        self.commit.notes += self.section
217                else:
218                    self.warn.append("Unknown section '%s'" % self.in_section)
219                self.in_section = None
220                self.skip_blank = True
221                self.section = []
222            else:
223                self.section.append(line)
224
225        # Detect the commit subject
226        elif not is_blank and self.state == STATE_PATCH_SUBJECT:
227            self.commit.subject = line
228
229        # Detect the tags we want to remove, and skip blank lines
230        elif re_remove.match(line) and not commit_tag_match:
231            self.skip_blank = True
232
233            # TEST= should be the last thing in the commit, so remove
234            # everything after it
235            if line.startswith('TEST='):
236                self.found_test = True
237        elif self.skip_blank and is_blank:
238            self.skip_blank = False
239
240        # Detect the start of a cover letter section
241        elif cover_match:
242            self.in_section = 'cover'
243            self.skip_blank = False
244
245        elif cover_cc_match:
246            value = cover_cc_match.group(1)
247            self.AddToSeries(line, 'cover-cc', value)
248
249        # If we are in a change list, key collected lines until a blank one
250        elif self.in_change:
251            if is_blank:
252                # Blank line ends this change list
253                self.in_change = 0
254            elif line == '---':
255                self.in_change = 0
256                out = self.ProcessLine(line)
257            else:
258                if self.is_log:
259                    self.series.AddChange(self.in_change, self.commit, line)
260            self.skip_blank = False
261
262        # Detect Series-xxx tags
263        elif series_tag_match:
264            name = series_tag_match.group(1)
265            value = series_tag_match.group(2)
266            if name == 'changes':
267                # value is the version number: e.g. 1, or 2
268                try:
269                    value = int(value)
270                except ValueError as str:
271                    raise ValueError("%s: Cannot decode version info '%s'" %
272                        (self.commit.hash, line))
273                self.in_change = int(value)
274            else:
275                self.AddToSeries(line, name, value)
276                self.skip_blank = True
277
278        # Detect Commit-xxx tags
279        elif commit_tag_match:
280            name = commit_tag_match.group(1)
281            value = commit_tag_match.group(2)
282            if name == 'notes':
283                self.AddToCommit(line, name, value)
284                self.skip_blank = True
285
286        # Detect the start of a new commit
287        elif commit_match:
288            self.CloseCommit()
289            self.commit = commit.Commit(commit_match.group(1))
290
291        # Detect tags in the commit message
292        elif tag_match:
293            # Remove Tested-by self, since few will take much notice
294            if (tag_match.group(1) == 'Tested-by' and
295                    tag_match.group(2).find(os.getenv('USER') + '@') != -1):
296                self.warn.append("Ignoring %s" % line)
297            elif tag_match.group(1) == 'Patch-cc':
298                self.commit.AddCc(tag_match.group(2).split(','))
299            else:
300                out = [line]
301
302        # Suppress duplicate signoffs
303        elif signoff_match:
304            if (self.is_log or not self.commit or
305                self.commit.CheckDuplicateSignoff(signoff_match.group(1))):
306                out = [line]
307
308        # Well that means this is an ordinary line
309        else:
310            # Look for space before tab
311            m = re_space_before_tab.match(line)
312            if m:
313                self.warn.append('Line %d/%d has space before tab' %
314                    (self.linenum, m.start()))
315
316            # OK, we have a valid non-blank line
317            out = [line]
318            self.linenum += 1
319            self.skip_blank = False
320            if self.state == STATE_DIFFS:
321                pass
322
323            # If this is the start of the diffs section, emit our tags and
324            # change log
325            elif line == '---':
326                self.state = STATE_DIFFS
327
328                # Output the tags (signeoff first), then change list
329                out = []
330                log = self.series.MakeChangeLog(self.commit)
331                out += [line]
332                if self.commit:
333                    out += self.commit.notes
334                out += [''] + log
335            elif self.found_test:
336                if not re_allowed_after_test.match(line):
337                    self.lines_after_test += 1
338
339        return out
340
341    def Finalize(self):
342        """Close out processing of this patch stream"""
343        self.CloseCommit()
344        if self.lines_after_test:
345            self.warn.append('Found %d lines after TEST=' %
346                    self.lines_after_test)
347
348    def ProcessStream(self, infd, outfd):
349        """Copy a stream from infd to outfd, filtering out unwanting things.
350
351        This is used to process patch files one at a time.
352
353        Args:
354            infd: Input stream file object
355            outfd: Output stream file object
356        """
357        # Extract the filename from each diff, for nice warnings
358        fname = None
359        last_fname = None
360        re_fname = re.compile('diff --git a/(.*) b/.*')
361        while True:
362            line = infd.readline()
363            if not line:
364                break
365            out = self.ProcessLine(line)
366
367            # Try to detect blank lines at EOF
368            for line in out:
369                match = re_fname.match(line)
370                if match:
371                    last_fname = fname
372                    fname = match.group(1)
373                if line == '+':
374                    self.blank_count += 1
375                else:
376                    if self.blank_count and (line == '-- ' or match):
377                        self.warn.append("Found possible blank line(s) at "
378                                "end of file '%s'" % last_fname)
379                    outfd.write('+\n' * self.blank_count)
380                    outfd.write(line + '\n')
381                    self.blank_count = 0
382        self.Finalize()
383
384
385def GetMetaDataForList(commit_range, git_dir=None, count=None,
386                       series = None, allow_overwrite=False):
387    """Reads out patch series metadata from the commits
388
389    This does a 'git log' on the relevant commits and pulls out the tags we
390    are interested in.
391
392    Args:
393        commit_range: Range of commits to count (e.g. 'HEAD..base')
394        git_dir: Path to git repositiory (None to use default)
395        count: Number of commits to list, or None for no limit
396        series: Series object to add information into. By default a new series
397            is started.
398        allow_overwrite: Allow tags to overwrite an existing tag
399    Returns:
400        A Series object containing information about the commits.
401    """
402    if not series:
403        series = Series()
404    series.allow_overwrite = allow_overwrite
405    params = gitutil.LogCmd(commit_range, reverse=True, count=count,
406                            git_dir=git_dir)
407    stdout = command.RunPipe([params], capture=True).stdout
408    ps = PatchStream(series, is_log=True)
409    for line in stdout.splitlines():
410        ps.ProcessLine(line)
411    ps.Finalize()
412    return series
413
414def GetMetaData(start, count):
415    """Reads out patch series metadata from the commits
416
417    This does a 'git log' on the relevant commits and pulls out the tags we
418    are interested in.
419
420    Args:
421        start: Commit to start from: 0=HEAD, 1=next one, etc.
422        count: Number of commits to list
423    """
424    return GetMetaDataForList('HEAD~%d' % start, None, count)
425
426def GetMetaDataForTest(text):
427    """Process metadata from a file containing a git log. Used for tests
428
429    Args:
430        text:
431    """
432    series = Series()
433    ps = PatchStream(series, is_log=True)
434    for line in text.splitlines():
435        ps.ProcessLine(line)
436    ps.Finalize()
437    return series
438
439def FixPatch(backup_dir, fname, series, commit):
440    """Fix up a patch file, by adding/removing as required.
441
442    We remove our tags from the patch file, insert changes lists, etc.
443    The patch file is processed in place, and overwritten.
444
445    A backup file is put into backup_dir (if not None).
446
447    Args:
448        fname: Filename to patch file to process
449        series: Series information about this patch set
450        commit: Commit object for this patch file
451    Return:
452        A list of errors, or [] if all ok.
453    """
454    handle, tmpname = tempfile.mkstemp()
455    outfd = os.fdopen(handle, 'w')
456    infd = open(fname, 'r')
457    ps = PatchStream(series)
458    ps.commit = commit
459    ps.ProcessStream(infd, outfd)
460    infd.close()
461    outfd.close()
462
463    # Create a backup file if required
464    if backup_dir:
465        shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
466    shutil.move(tmpname, fname)
467    return ps.warn
468
469def FixPatches(series, fnames):
470    """Fix up a list of patches identified by filenames
471
472    The patch files are processed in place, and overwritten.
473
474    Args:
475        series: The series object
476        fnames: List of patch files to process
477    """
478    # Current workflow creates patches, so we shouldn't need a backup
479    backup_dir = None  #tempfile.mkdtemp('clean-patch')
480    count = 0
481    for fname in fnames:
482        commit = series.commits[count]
483        commit.patch = fname
484        result = FixPatch(backup_dir, fname, series, commit)
485        if result:
486            print('%d warnings for %s:' % (len(result), fname))
487            for warn in result:
488                print('\t', warn)
489            print
490        count += 1
491    print('Cleaned %d patches' % count)
492
493def InsertCoverLetter(fname, series, count):
494    """Inserts a cover letter with the required info into patch 0
495
496    Args:
497        fname: Input / output filename of the cover letter file
498        series: Series object
499        count: Number of patches in the series
500    """
501    fd = open(fname, 'r')
502    lines = fd.readlines()
503    fd.close()
504
505    fd = open(fname, 'w')
506    text = series.cover
507    prefix = series.GetPatchPrefix()
508    for line in lines:
509        if line.startswith('Subject:'):
510            # if more than 10 or 100 patches, it should say 00/xx, 000/xxx, etc
511            zero_repeat = int(math.log10(count)) + 1
512            zero = '0' * zero_repeat
513            line = 'Subject: [%s %s/%d] %s\n' % (prefix, zero, count, text[0])
514
515        # Insert our cover letter
516        elif line.startswith('*** BLURB HERE ***'):
517            # First the blurb test
518            line = '\n'.join(text[1:]) + '\n'
519            if series.get('notes'):
520                line += '\n'.join(series.notes) + '\n'
521
522            # Now the change list
523            out = series.MakeChangeLog(None)
524            line += '\n' + '\n'.join(out)
525        fd.write(line)
526    fd.close()
527