1# Copyright (c) 2011 The Chromium OS Authors. 2# 3# SPDX-License-Identifier: GPL-2.0+ 4# 5 6import math 7import os 8import re 9import shutil 10import tempfile 11 12import command 13import commit 14import gitutil 15from series import Series 16 17# Tags that we detect and remove 18re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:' 19 '|Reviewed-on:|Commit-\w*:') 20 21# Lines which are allowed after a TEST= line 22re_allowed_after_test = re.compile('^Signed-off-by:') 23 24# Signoffs 25re_signoff = re.compile('^Signed-off-by: *(.*)') 26 27# The start of the cover letter 28re_cover = re.compile('^Cover-letter:') 29 30# A cover letter Cc 31re_cover_cc = re.compile('^Cover-letter-cc: *(.*)') 32 33# Patch series tag 34re_series_tag = re.compile('^Series-([a-z-]*): *(.*)') 35 36# Commit series tag 37re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)') 38 39# Commit tags that we want to collect and keep 40re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)') 41 42# The start of a new commit in the git log 43re_commit = re.compile('^commit ([0-9a-f]*)$') 44 45# We detect these since checkpatch doesn't always do it 46re_space_before_tab = re.compile('^[+].* \t') 47 48# States we can be in - can we use range() and still have comments? 49STATE_MSG_HEADER = 0 # Still in the message header 50STATE_PATCH_SUBJECT = 1 # In patch subject (first line of log for a commit) 51STATE_PATCH_HEADER = 2 # In patch header (after the subject) 52STATE_DIFFS = 3 # In the diff part (past --- line) 53 54class PatchStream: 55 """Class for detecting/injecting tags in a patch or series of patches 56 57 We support processing the output of 'git log' to read out the tags we 58 are interested in. We can also process a patch file in order to remove 59 unwanted tags or inject additional ones. These correspond to the two 60 phases of processing. 61 """ 62 def __init__(self, series, name=None, is_log=False): 63 self.skip_blank = False # True to skip a single blank line 64 self.found_test = False # Found a TEST= line 65 self.lines_after_test = 0 # MNumber of lines found after TEST= 66 self.warn = [] # List of warnings we have collected 67 self.linenum = 1 # Output line number we are up to 68 self.in_section = None # Name of start...END section we are in 69 self.notes = [] # Series notes 70 self.section = [] # The current section...END section 71 self.series = series # Info about the patch series 72 self.is_log = is_log # True if indent like git log 73 self.in_change = 0 # Non-zero if we are in a change list 74 self.blank_count = 0 # Number of blank lines stored up 75 self.state = STATE_MSG_HEADER # What state are we in? 76 self.signoff = [] # Contents of signoff line 77 self.commit = None # Current commit 78 79 def AddToSeries(self, line, name, value): 80 """Add a new Series-xxx tag. 81 82 When a Series-xxx tag is detected, we come here to record it, if we 83 are scanning a 'git log'. 84 85 Args: 86 line: Source line containing tag (useful for debug/error messages) 87 name: Tag name (part after 'Series-') 88 value: Tag value (part after 'Series-xxx: ') 89 """ 90 if name == 'notes': 91 self.in_section = name 92 self.skip_blank = False 93 if self.is_log: 94 self.series.AddTag(self.commit, line, name, value) 95 96 def AddToCommit(self, line, name, value): 97 """Add a new Commit-xxx tag. 98 99 When a Commit-xxx tag is detected, we come here to record it. 100 101 Args: 102 line: Source line containing tag (useful for debug/error messages) 103 name: Tag name (part after 'Commit-') 104 value: Tag value (part after 'Commit-xxx: ') 105 """ 106 if name == 'notes': 107 self.in_section = 'commit-' + name 108 self.skip_blank = False 109 110 def CloseCommit(self): 111 """Save the current commit into our commit list, and reset our state""" 112 if self.commit and self.is_log: 113 self.series.AddCommit(self.commit) 114 self.commit = None 115 116 def ProcessLine(self, line): 117 """Process a single line of a patch file or commit log 118 119 This process a line and returns a list of lines to output. The list 120 may be empty or may contain multiple output lines. 121 122 This is where all the complicated logic is located. The class's 123 state is used to move between different states and detect things 124 properly. 125 126 We can be in one of two modes: 127 self.is_log == True: This is 'git log' mode, where most output is 128 indented by 4 characters and we are scanning for tags 129 130 self.is_log == False: This is 'patch' mode, where we already have 131 all the tags, and are processing patches to remove junk we 132 don't want, and add things we think are required. 133 134 Args: 135 line: text line to process 136 137 Returns: 138 list of output lines, or [] if nothing should be output 139 """ 140 # Initially we have no output. Prepare the input line string 141 out = [] 142 line = line.rstrip('\n') 143 144 commit_match = re_commit.match(line) if self.is_log else None 145 146 if self.is_log: 147 if line[:4] == ' ': 148 line = line[4:] 149 150 # Handle state transition and skipping blank lines 151 series_tag_match = re_series_tag.match(line) 152 commit_tag_match = re_commit_tag.match(line) 153 cover_cc_match = re_cover_cc.match(line) 154 signoff_match = re_signoff.match(line) 155 tag_match = None 156 if self.state == STATE_PATCH_HEADER: 157 tag_match = re_tag.match(line) 158 is_blank = not line.strip() 159 if is_blank: 160 if (self.state == STATE_MSG_HEADER 161 or self.state == STATE_PATCH_SUBJECT): 162 self.state += 1 163 164 # We don't have a subject in the text stream of patch files 165 # It has its own line with a Subject: tag 166 if not self.is_log and self.state == STATE_PATCH_SUBJECT: 167 self.state += 1 168 elif commit_match: 169 self.state = STATE_MSG_HEADER 170 171 # If we are in a section, keep collecting lines until we see END 172 if self.in_section: 173 if line == 'END': 174 if self.in_section == 'cover': 175 self.series.cover = self.section 176 elif self.in_section == 'notes': 177 if self.is_log: 178 self.series.notes += self.section 179 elif self.in_section == 'commit-notes': 180 if self.is_log: 181 self.commit.notes += self.section 182 else: 183 self.warn.append("Unknown section '%s'" % self.in_section) 184 self.in_section = None 185 self.skip_blank = True 186 self.section = [] 187 else: 188 self.section.append(line) 189 190 # Detect the commit subject 191 elif not is_blank and self.state == STATE_PATCH_SUBJECT: 192 self.commit.subject = line 193 194 # Detect the tags we want to remove, and skip blank lines 195 elif re_remove.match(line) and not commit_tag_match: 196 self.skip_blank = True 197 198 # TEST= should be the last thing in the commit, so remove 199 # everything after it 200 if line.startswith('TEST='): 201 self.found_test = True 202 elif self.skip_blank and is_blank: 203 self.skip_blank = False 204 205 # Detect the start of a cover letter section 206 elif re_cover.match(line): 207 self.in_section = 'cover' 208 self.skip_blank = False 209 210 elif cover_cc_match: 211 value = cover_cc_match.group(1) 212 self.AddToSeries(line, 'cover-cc', value) 213 214 # If we are in a change list, key collected lines until a blank one 215 elif self.in_change: 216 if is_blank: 217 # Blank line ends this change list 218 self.in_change = 0 219 elif line == '---': 220 self.in_change = 0 221 out = self.ProcessLine(line) 222 else: 223 if self.is_log: 224 self.series.AddChange(self.in_change, self.commit, line) 225 self.skip_blank = False 226 227 # Detect Series-xxx tags 228 elif series_tag_match: 229 name = series_tag_match.group(1) 230 value = series_tag_match.group(2) 231 if name == 'changes': 232 # value is the version number: e.g. 1, or 2 233 try: 234 value = int(value) 235 except ValueError as str: 236 raise ValueError("%s: Cannot decode version info '%s'" % 237 (self.commit.hash, line)) 238 self.in_change = int(value) 239 else: 240 self.AddToSeries(line, name, value) 241 self.skip_blank = True 242 243 # Detect Commit-xxx tags 244 elif commit_tag_match: 245 name = commit_tag_match.group(1) 246 value = commit_tag_match.group(2) 247 if name == 'notes': 248 self.AddToCommit(line, name, value) 249 self.skip_blank = True 250 251 # Detect the start of a new commit 252 elif commit_match: 253 self.CloseCommit() 254 self.commit = commit.Commit(commit_match.group(1)) 255 256 # Detect tags in the commit message 257 elif tag_match: 258 # Remove Tested-by self, since few will take much notice 259 if (tag_match.group(1) == 'Tested-by' and 260 tag_match.group(2).find(os.getenv('USER') + '@') != -1): 261 self.warn.append("Ignoring %s" % line) 262 elif tag_match.group(1) == 'Patch-cc': 263 self.commit.AddCc(tag_match.group(2).split(',')) 264 else: 265 out = [line] 266 267 # Suppress duplicate signoffs 268 elif signoff_match: 269 if (self.is_log or not self.commit or 270 self.commit.CheckDuplicateSignoff(signoff_match.group(1))): 271 out = [line] 272 273 # Well that means this is an ordinary line 274 else: 275 pos = 1 276 # Look for ugly ASCII characters 277 for ch in line: 278 # TODO: Would be nicer to report source filename and line 279 if ord(ch) > 0x80: 280 self.warn.append("Line %d/%d ('%s') has funny ascii char" % 281 (self.linenum, pos, line)) 282 pos += 1 283 284 # Look for space before tab 285 m = re_space_before_tab.match(line) 286 if m: 287 self.warn.append('Line %d/%d has space before tab' % 288 (self.linenum, m.start())) 289 290 # OK, we have a valid non-blank line 291 out = [line] 292 self.linenum += 1 293 self.skip_blank = False 294 if self.state == STATE_DIFFS: 295 pass 296 297 # If this is the start of the diffs section, emit our tags and 298 # change log 299 elif line == '---': 300 self.state = STATE_DIFFS 301 302 # Output the tags (signeoff first), then change list 303 out = [] 304 log = self.series.MakeChangeLog(self.commit) 305 out += [line] 306 if self.commit: 307 out += self.commit.notes 308 out += [''] + log 309 elif self.found_test: 310 if not re_allowed_after_test.match(line): 311 self.lines_after_test += 1 312 313 return out 314 315 def Finalize(self): 316 """Close out processing of this patch stream""" 317 self.CloseCommit() 318 if self.lines_after_test: 319 self.warn.append('Found %d lines after TEST=' % 320 self.lines_after_test) 321 322 def ProcessStream(self, infd, outfd): 323 """Copy a stream from infd to outfd, filtering out unwanting things. 324 325 This is used to process patch files one at a time. 326 327 Args: 328 infd: Input stream file object 329 outfd: Output stream file object 330 """ 331 # Extract the filename from each diff, for nice warnings 332 fname = None 333 last_fname = None 334 re_fname = re.compile('diff --git a/(.*) b/.*') 335 while True: 336 line = infd.readline() 337 if not line: 338 break 339 out = self.ProcessLine(line) 340 341 # Try to detect blank lines at EOF 342 for line in out: 343 match = re_fname.match(line) 344 if match: 345 last_fname = fname 346 fname = match.group(1) 347 if line == '+': 348 self.blank_count += 1 349 else: 350 if self.blank_count and (line == '-- ' or match): 351 self.warn.append("Found possible blank line(s) at " 352 "end of file '%s'" % last_fname) 353 outfd.write('+\n' * self.blank_count) 354 outfd.write(line + '\n') 355 self.blank_count = 0 356 self.Finalize() 357 358 359def GetMetaDataForList(commit_range, git_dir=None, count=None, 360 series = None, allow_overwrite=False): 361 """Reads out patch series metadata from the commits 362 363 This does a 'git log' on the relevant commits and pulls out the tags we 364 are interested in. 365 366 Args: 367 commit_range: Range of commits to count (e.g. 'HEAD..base') 368 git_dir: Path to git repositiory (None to use default) 369 count: Number of commits to list, or None for no limit 370 series: Series object to add information into. By default a new series 371 is started. 372 allow_overwrite: Allow tags to overwrite an existing tag 373 Returns: 374 A Series object containing information about the commits. 375 """ 376 if not series: 377 series = Series() 378 series.allow_overwrite = allow_overwrite 379 params = gitutil.LogCmd(commit_range, reverse=True, count=count, 380 git_dir=git_dir) 381 stdout = command.RunPipe([params], capture=True).stdout 382 ps = PatchStream(series, is_log=True) 383 for line in stdout.splitlines(): 384 ps.ProcessLine(line) 385 ps.Finalize() 386 return series 387 388def GetMetaData(start, count): 389 """Reads out patch series metadata from the commits 390 391 This does a 'git log' on the relevant commits and pulls out the tags we 392 are interested in. 393 394 Args: 395 start: Commit to start from: 0=HEAD, 1=next one, etc. 396 count: Number of commits to list 397 """ 398 return GetMetaDataForList('HEAD~%d' % start, None, count) 399 400def FixPatch(backup_dir, fname, series, commit): 401 """Fix up a patch file, by adding/removing as required. 402 403 We remove our tags from the patch file, insert changes lists, etc. 404 The patch file is processed in place, and overwritten. 405 406 A backup file is put into backup_dir (if not None). 407 408 Args: 409 fname: Filename to patch file to process 410 series: Series information about this patch set 411 commit: Commit object for this patch file 412 Return: 413 A list of errors, or [] if all ok. 414 """ 415 handle, tmpname = tempfile.mkstemp() 416 outfd = os.fdopen(handle, 'w') 417 infd = open(fname, 'r') 418 ps = PatchStream(series) 419 ps.commit = commit 420 ps.ProcessStream(infd, outfd) 421 infd.close() 422 outfd.close() 423 424 # Create a backup file if required 425 if backup_dir: 426 shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname))) 427 shutil.move(tmpname, fname) 428 return ps.warn 429 430def FixPatches(series, fnames): 431 """Fix up a list of patches identified by filenames 432 433 The patch files are processed in place, and overwritten. 434 435 Args: 436 series: The series object 437 fnames: List of patch files to process 438 """ 439 # Current workflow creates patches, so we shouldn't need a backup 440 backup_dir = None #tempfile.mkdtemp('clean-patch') 441 count = 0 442 for fname in fnames: 443 commit = series.commits[count] 444 commit.patch = fname 445 result = FixPatch(backup_dir, fname, series, commit) 446 if result: 447 print '%d warnings for %s:' % (len(result), fname) 448 for warn in result: 449 print '\t', warn 450 print 451 count += 1 452 print 'Cleaned %d patches' % count 453 return series 454 455def InsertCoverLetter(fname, series, count): 456 """Inserts a cover letter with the required info into patch 0 457 458 Args: 459 fname: Input / output filename of the cover letter file 460 series: Series object 461 count: Number of patches in the series 462 """ 463 fd = open(fname, 'r') 464 lines = fd.readlines() 465 fd.close() 466 467 fd = open(fname, 'w') 468 text = series.cover 469 prefix = series.GetPatchPrefix() 470 for line in lines: 471 if line.startswith('Subject:'): 472 # if more than 10 or 100 patches, it should say 00/xx, 000/xxx, etc 473 zero_repeat = int(math.log10(count)) + 1 474 zero = '0' * zero_repeat 475 line = 'Subject: [%s %s/%d] %s\n' % (prefix, zero, count, text[0]) 476 477 # Insert our cover letter 478 elif line.startswith('*** BLURB HERE ***'): 479 # First the blurb test 480 line = '\n'.join(text[1:]) + '\n' 481 if series.get('notes'): 482 line += '\n'.join(series.notes) + '\n' 483 484 # Now the change list 485 out = series.MakeChangeLog(None) 486 line += '\n' + '\n'.join(out) 487 fd.write(line) 488 fd.close() 489