xref: /openbmc/u-boot/tools/patman/patchstream.py (revision d9b88d25)
1# Copyright (c) 2011 The Chromium OS Authors.
2#
3# SPDX-License-Identifier:	GPL-2.0+
4#
5
6import math
7import os
8import re
9import shutil
10import tempfile
11
12import command
13import commit
14import gitutil
15from series import Series
16
17# Tags that we detect and remove
18re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:'
19    '|Reviewed-on:|Commit-\w*:')
20
21# Lines which are allowed after a TEST= line
22re_allowed_after_test = re.compile('^Signed-off-by:')
23
24# Signoffs
25re_signoff = re.compile('^Signed-off-by: *(.*)')
26
27# The start of the cover letter
28re_cover = re.compile('^Cover-letter:')
29
30# A cover letter Cc
31re_cover_cc = re.compile('^Cover-letter-cc: *(.*)')
32
33# Patch series tag
34re_series_tag = re.compile('^Series-([a-z-]*): *(.*)')
35
36# Commit series tag
37re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)')
38
39# Commit tags that we want to collect and keep
40re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)')
41
42# The start of a new commit in the git log
43re_commit = re.compile('^commit ([0-9a-f]*)$')
44
45# We detect these since checkpatch doesn't always do it
46re_space_before_tab = re.compile('^[+].* \t')
47
48# States we can be in - can we use range() and still have comments?
49STATE_MSG_HEADER = 0        # Still in the message header
50STATE_PATCH_SUBJECT = 1     # In patch subject (first line of log for a commit)
51STATE_PATCH_HEADER = 2      # In patch header (after the subject)
52STATE_DIFFS = 3             # In the diff part (past --- line)
53
54class PatchStream:
55    """Class for detecting/injecting tags in a patch or series of patches
56
57    We support processing the output of 'git log' to read out the tags we
58    are interested in. We can also process a patch file in order to remove
59    unwanted tags or inject additional ones. These correspond to the two
60    phases of processing.
61    """
62    def __init__(self, series, name=None, is_log=False):
63        self.skip_blank = False          # True to skip a single blank line
64        self.found_test = False          # Found a TEST= line
65        self.lines_after_test = 0        # MNumber of lines found after TEST=
66        self.warn = []                   # List of warnings we have collected
67        self.linenum = 1                 # Output line number we are up to
68        self.in_section = None           # Name of start...END section we are in
69        self.notes = []                  # Series notes
70        self.section = []                # The current section...END section
71        self.series = series             # Info about the patch series
72        self.is_log = is_log             # True if indent like git log
73        self.in_change = 0               # Non-zero if we are in a change list
74        self.blank_count = 0             # Number of blank lines stored up
75        self.state = STATE_MSG_HEADER    # What state are we in?
76        self.signoff = []                # Contents of signoff line
77        self.commit = None               # Current commit
78
79    def AddToSeries(self, line, name, value):
80        """Add a new Series-xxx tag.
81
82        When a Series-xxx tag is detected, we come here to record it, if we
83        are scanning a 'git log'.
84
85        Args:
86            line: Source line containing tag (useful for debug/error messages)
87            name: Tag name (part after 'Series-')
88            value: Tag value (part after 'Series-xxx: ')
89        """
90        if name == 'notes':
91            self.in_section = name
92            self.skip_blank = False
93        if self.is_log:
94            self.series.AddTag(self.commit, line, name, value)
95
96    def AddToCommit(self, line, name, value):
97        """Add a new Commit-xxx tag.
98
99        When a Commit-xxx tag is detected, we come here to record it.
100
101        Args:
102            line: Source line containing tag (useful for debug/error messages)
103            name: Tag name (part after 'Commit-')
104            value: Tag value (part after 'Commit-xxx: ')
105        """
106        if name == 'notes':
107            self.in_section = 'commit-' + name
108            self.skip_blank = False
109
110    def CloseCommit(self):
111        """Save the current commit into our commit list, and reset our state"""
112        if self.commit and self.is_log:
113            self.series.AddCommit(self.commit)
114            self.commit = None
115        # If 'END' is missing in a 'Cover-letter' section, and that section
116        # happens to show up at the very end of the commit message, this is
117        # the chance for us to fix it up.
118        if self.in_section == 'cover' and self.is_log:
119            self.series.cover = self.section
120            self.in_section = None
121            self.skip_blank = True
122            self.section = []
123
124    def ProcessLine(self, line):
125        """Process a single line of a patch file or commit log
126
127        This process a line and returns a list of lines to output. The list
128        may be empty or may contain multiple output lines.
129
130        This is where all the complicated logic is located. The class's
131        state is used to move between different states and detect things
132        properly.
133
134        We can be in one of two modes:
135            self.is_log == True: This is 'git log' mode, where most output is
136                indented by 4 characters and we are scanning for tags
137
138            self.is_log == False: This is 'patch' mode, where we already have
139                all the tags, and are processing patches to remove junk we
140                don't want, and add things we think are required.
141
142        Args:
143            line: text line to process
144
145        Returns:
146            list of output lines, or [] if nothing should be output
147        """
148        # Initially we have no output. Prepare the input line string
149        out = []
150        line = line.rstrip('\n')
151
152        commit_match = re_commit.match(line) if self.is_log else None
153
154        if self.is_log:
155            if line[:4] == '    ':
156                line = line[4:]
157
158        # Handle state transition and skipping blank lines
159        series_tag_match = re_series_tag.match(line)
160        commit_tag_match = re_commit_tag.match(line)
161        cover_match = re_cover.match(line)
162        cover_cc_match = re_cover_cc.match(line)
163        signoff_match = re_signoff.match(line)
164        tag_match = None
165        if self.state == STATE_PATCH_HEADER:
166            tag_match = re_tag.match(line)
167        is_blank = not line.strip()
168        if is_blank:
169            if (self.state == STATE_MSG_HEADER
170                    or self.state == STATE_PATCH_SUBJECT):
171                self.state += 1
172
173            # We don't have a subject in the text stream of patch files
174            # It has its own line with a Subject: tag
175            if not self.is_log and self.state == STATE_PATCH_SUBJECT:
176                self.state += 1
177        elif commit_match:
178            self.state = STATE_MSG_HEADER
179
180        # If a tag is detected, or a new commit starts
181        if series_tag_match or commit_tag_match or \
182           cover_match or cover_cc_match or signoff_match or \
183           self.state == STATE_MSG_HEADER:
184            # but we are already in a section, this means 'END' is missing
185            # for that section, fix it up.
186            if self.in_section:
187                self.warn.append("Missing 'END' in section '%s'" % self.in_section)
188                if self.in_section == 'cover':
189                    self.series.cover = self.section
190                elif self.in_section == 'notes':
191                    if self.is_log:
192                        self.series.notes += self.section
193                elif self.in_section == 'commit-notes':
194                    if self.is_log:
195                        self.commit.notes += self.section
196                else:
197                    self.warn.append("Unknown section '%s'" % self.in_section)
198                self.in_section = None
199                self.skip_blank = True
200                self.section = []
201            # but we are already in a change list, that means a blank line
202            # is missing, fix it up.
203            if self.in_change:
204                self.warn.append("Missing 'blank line' in section 'Series-changes'")
205                self.in_change = 0
206
207        # If we are in a section, keep collecting lines until we see END
208        if self.in_section:
209            if line == 'END':
210                if self.in_section == 'cover':
211                    self.series.cover = self.section
212                elif self.in_section == 'notes':
213                    if self.is_log:
214                        self.series.notes += self.section
215                elif self.in_section == 'commit-notes':
216                    if self.is_log:
217                        self.commit.notes += self.section
218                else:
219                    self.warn.append("Unknown section '%s'" % self.in_section)
220                self.in_section = None
221                self.skip_blank = True
222                self.section = []
223            else:
224                self.section.append(line)
225
226        # Detect the commit subject
227        elif not is_blank and self.state == STATE_PATCH_SUBJECT:
228            self.commit.subject = line
229
230        # Detect the tags we want to remove, and skip blank lines
231        elif re_remove.match(line) and not commit_tag_match:
232            self.skip_blank = True
233
234            # TEST= should be the last thing in the commit, so remove
235            # everything after it
236            if line.startswith('TEST='):
237                self.found_test = True
238        elif self.skip_blank and is_blank:
239            self.skip_blank = False
240
241        # Detect the start of a cover letter section
242        elif cover_match:
243            self.in_section = 'cover'
244            self.skip_blank = False
245
246        elif cover_cc_match:
247            value = cover_cc_match.group(1)
248            self.AddToSeries(line, 'cover-cc', value)
249
250        # If we are in a change list, key collected lines until a blank one
251        elif self.in_change:
252            if is_blank:
253                # Blank line ends this change list
254                self.in_change = 0
255            elif line == '---':
256                self.in_change = 0
257                out = self.ProcessLine(line)
258            else:
259                if self.is_log:
260                    self.series.AddChange(self.in_change, self.commit, line)
261            self.skip_blank = False
262
263        # Detect Series-xxx tags
264        elif series_tag_match:
265            name = series_tag_match.group(1)
266            value = series_tag_match.group(2)
267            if name == 'changes':
268                # value is the version number: e.g. 1, or 2
269                try:
270                    value = int(value)
271                except ValueError as str:
272                    raise ValueError("%s: Cannot decode version info '%s'" %
273                        (self.commit.hash, line))
274                self.in_change = int(value)
275            else:
276                self.AddToSeries(line, name, value)
277                self.skip_blank = True
278
279        # Detect Commit-xxx tags
280        elif commit_tag_match:
281            name = commit_tag_match.group(1)
282            value = commit_tag_match.group(2)
283            if name == 'notes':
284                self.AddToCommit(line, name, value)
285                self.skip_blank = True
286
287        # Detect the start of a new commit
288        elif commit_match:
289            self.CloseCommit()
290            self.commit = commit.Commit(commit_match.group(1))
291
292        # Detect tags in the commit message
293        elif tag_match:
294            # Remove Tested-by self, since few will take much notice
295            if (tag_match.group(1) == 'Tested-by' and
296                    tag_match.group(2).find(os.getenv('USER') + '@') != -1):
297                self.warn.append("Ignoring %s" % line)
298            elif tag_match.group(1) == 'Patch-cc':
299                self.commit.AddCc(tag_match.group(2).split(','))
300            else:
301                out = [line]
302
303        # Suppress duplicate signoffs
304        elif signoff_match:
305            if (self.is_log or not self.commit or
306                self.commit.CheckDuplicateSignoff(signoff_match.group(1))):
307                out = [line]
308
309        # Well that means this is an ordinary line
310        else:
311            pos = 1
312            # Look for ugly ASCII characters
313            for ch in line:
314                # TODO: Would be nicer to report source filename and line
315                if ord(ch) > 0x80:
316                    self.warn.append("Line %d/%d ('%s') has funny ascii char" %
317                        (self.linenum, pos, line))
318                pos += 1
319
320            # Look for space before tab
321            m = re_space_before_tab.match(line)
322            if m:
323                self.warn.append('Line %d/%d has space before tab' %
324                    (self.linenum, m.start()))
325
326            # OK, we have a valid non-blank line
327            out = [line]
328            self.linenum += 1
329            self.skip_blank = False
330            if self.state == STATE_DIFFS:
331                pass
332
333            # If this is the start of the diffs section, emit our tags and
334            # change log
335            elif line == '---':
336                self.state = STATE_DIFFS
337
338                # Output the tags (signeoff first), then change list
339                out = []
340                log = self.series.MakeChangeLog(self.commit)
341                out += [line]
342                if self.commit:
343                    out += self.commit.notes
344                out += [''] + log
345            elif self.found_test:
346                if not re_allowed_after_test.match(line):
347                    self.lines_after_test += 1
348
349        return out
350
351    def Finalize(self):
352        """Close out processing of this patch stream"""
353        self.CloseCommit()
354        if self.lines_after_test:
355            self.warn.append('Found %d lines after TEST=' %
356                    self.lines_after_test)
357
358    def ProcessStream(self, infd, outfd):
359        """Copy a stream from infd to outfd, filtering out unwanting things.
360
361        This is used to process patch files one at a time.
362
363        Args:
364            infd: Input stream file object
365            outfd: Output stream file object
366        """
367        # Extract the filename from each diff, for nice warnings
368        fname = None
369        last_fname = None
370        re_fname = re.compile('diff --git a/(.*) b/.*')
371        while True:
372            line = infd.readline()
373            if not line:
374                break
375            out = self.ProcessLine(line)
376
377            # Try to detect blank lines at EOF
378            for line in out:
379                match = re_fname.match(line)
380                if match:
381                    last_fname = fname
382                    fname = match.group(1)
383                if line == '+':
384                    self.blank_count += 1
385                else:
386                    if self.blank_count and (line == '-- ' or match):
387                        self.warn.append("Found possible blank line(s) at "
388                                "end of file '%s'" % last_fname)
389                    outfd.write('+\n' * self.blank_count)
390                    outfd.write(line + '\n')
391                    self.blank_count = 0
392        self.Finalize()
393
394
395def GetMetaDataForList(commit_range, git_dir=None, count=None,
396                       series = None, allow_overwrite=False):
397    """Reads out patch series metadata from the commits
398
399    This does a 'git log' on the relevant commits and pulls out the tags we
400    are interested in.
401
402    Args:
403        commit_range: Range of commits to count (e.g. 'HEAD..base')
404        git_dir: Path to git repositiory (None to use default)
405        count: Number of commits to list, or None for no limit
406        series: Series object to add information into. By default a new series
407            is started.
408        allow_overwrite: Allow tags to overwrite an existing tag
409    Returns:
410        A Series object containing information about the commits.
411    """
412    if not series:
413        series = Series()
414    series.allow_overwrite = allow_overwrite
415    params = gitutil.LogCmd(commit_range, reverse=True, count=count,
416                            git_dir=git_dir)
417    stdout = command.RunPipe([params], capture=True).stdout
418    ps = PatchStream(series, is_log=True)
419    for line in stdout.splitlines():
420        ps.ProcessLine(line)
421    ps.Finalize()
422    return series
423
424def GetMetaData(start, count):
425    """Reads out patch series metadata from the commits
426
427    This does a 'git log' on the relevant commits and pulls out the tags we
428    are interested in.
429
430    Args:
431        start: Commit to start from: 0=HEAD, 1=next one, etc.
432        count: Number of commits to list
433    """
434    return GetMetaDataForList('HEAD~%d' % start, None, count)
435
436def FixPatch(backup_dir, fname, series, commit):
437    """Fix up a patch file, by adding/removing as required.
438
439    We remove our tags from the patch file, insert changes lists, etc.
440    The patch file is processed in place, and overwritten.
441
442    A backup file is put into backup_dir (if not None).
443
444    Args:
445        fname: Filename to patch file to process
446        series: Series information about this patch set
447        commit: Commit object for this patch file
448    Return:
449        A list of errors, or [] if all ok.
450    """
451    handle, tmpname = tempfile.mkstemp()
452    outfd = os.fdopen(handle, 'w')
453    infd = open(fname, 'r')
454    ps = PatchStream(series)
455    ps.commit = commit
456    ps.ProcessStream(infd, outfd)
457    infd.close()
458    outfd.close()
459
460    # Create a backup file if required
461    if backup_dir:
462        shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname)))
463    shutil.move(tmpname, fname)
464    return ps.warn
465
466def FixPatches(series, fnames):
467    """Fix up a list of patches identified by filenames
468
469    The patch files are processed in place, and overwritten.
470
471    Args:
472        series: The series object
473        fnames: List of patch files to process
474    """
475    # Current workflow creates patches, so we shouldn't need a backup
476    backup_dir = None  #tempfile.mkdtemp('clean-patch')
477    count = 0
478    for fname in fnames:
479        commit = series.commits[count]
480        commit.patch = fname
481        result = FixPatch(backup_dir, fname, series, commit)
482        if result:
483            print('%d warnings for %s:' % (len(result), fname))
484            for warn in result:
485                print('\t', warn)
486            print
487        count += 1
488    print('Cleaned %d patches' % count)
489    return series
490
491def InsertCoverLetter(fname, series, count):
492    """Inserts a cover letter with the required info into patch 0
493
494    Args:
495        fname: Input / output filename of the cover letter file
496        series: Series object
497        count: Number of patches in the series
498    """
499    fd = open(fname, 'r')
500    lines = fd.readlines()
501    fd.close()
502
503    fd = open(fname, 'w')
504    text = series.cover
505    prefix = series.GetPatchPrefix()
506    for line in lines:
507        if line.startswith('Subject:'):
508            # if more than 10 or 100 patches, it should say 00/xx, 000/xxx, etc
509            zero_repeat = int(math.log10(count)) + 1
510            zero = '0' * zero_repeat
511            line = 'Subject: [%s %s/%d] %s\n' % (prefix, zero, count, text[0])
512
513        # Insert our cover letter
514        elif line.startswith('*** BLURB HERE ***'):
515            # First the blurb test
516            line = '\n'.join(text[1:]) + '\n'
517            if series.get('notes'):
518                line += '\n'.join(series.notes) + '\n'
519
520            # Now the change list
521            out = series.MakeChangeLog(None)
522            line += '\n' + '\n'.join(out)
523        fd.write(line)
524    fd.close()
525