1# Copyright (c) 2011 The Chromium OS Authors. 2# 3# SPDX-License-Identifier: GPL-2.0+ 4# 5 6import os 7import re 8import shutil 9import tempfile 10 11import command 12import commit 13import gitutil 14from series import Series 15 16# Tags that we detect and remove 17re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:' 18 '|Reviewed-on:|Commit-\w*:') 19 20# Lines which are allowed after a TEST= line 21re_allowed_after_test = re.compile('^Signed-off-by:') 22 23# Signoffs 24re_signoff = re.compile('^Signed-off-by:') 25 26# The start of the cover letter 27re_cover = re.compile('^Cover-letter:') 28 29# A cover letter Cc 30re_cover_cc = re.compile('^Cover-letter-cc: *(.*)') 31 32# Patch series tag 33re_series_tag = re.compile('^Series-([a-z-]*): *(.*)') 34 35# Commit series tag 36re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)') 37 38# Commit tags that we want to collect and keep 39re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)') 40 41# The start of a new commit in the git log 42re_commit = re.compile('^commit ([0-9a-f]*)$') 43 44# We detect these since checkpatch doesn't always do it 45re_space_before_tab = re.compile('^[+].* \t') 46 47# States we can be in - can we use range() and still have comments? 48STATE_MSG_HEADER = 0 # Still in the message header 49STATE_PATCH_SUBJECT = 1 # In patch subject (first line of log for a commit) 50STATE_PATCH_HEADER = 2 # In patch header (after the subject) 51STATE_DIFFS = 3 # In the diff part (past --- line) 52 53class PatchStream: 54 """Class for detecting/injecting tags in a patch or series of patches 55 56 We support processing the output of 'git log' to read out the tags we 57 are interested in. We can also process a patch file in order to remove 58 unwanted tags or inject additional ones. These correspond to the two 59 phases of processing. 60 """ 61 def __init__(self, series, name=None, is_log=False): 62 self.skip_blank = False # True to skip a single blank line 63 self.found_test = False # Found a TEST= line 64 self.lines_after_test = 0 # MNumber of lines found after TEST= 65 self.warn = [] # List of warnings we have collected 66 self.linenum = 1 # Output line number we are up to 67 self.in_section = None # Name of start...END section we are in 68 self.notes = [] # Series notes 69 self.section = [] # The current section...END section 70 self.series = series # Info about the patch series 71 self.is_log = is_log # True if indent like git log 72 self.in_change = 0 # Non-zero if we are in a change list 73 self.blank_count = 0 # Number of blank lines stored up 74 self.state = STATE_MSG_HEADER # What state are we in? 75 self.tags = [] # Tags collected, like Tested-by... 76 self.signoff = [] # Contents of signoff line 77 self.commit = None # Current commit 78 79 def AddToSeries(self, line, name, value): 80 """Add a new Series-xxx tag. 81 82 When a Series-xxx tag is detected, we come here to record it, if we 83 are scanning a 'git log'. 84 85 Args: 86 line: Source line containing tag (useful for debug/error messages) 87 name: Tag name (part after 'Series-') 88 value: Tag value (part after 'Series-xxx: ') 89 """ 90 if name == 'notes': 91 self.in_section = name 92 self.skip_blank = False 93 if self.is_log: 94 self.series.AddTag(self.commit, line, name, value) 95 96 def AddToCommit(self, line, name, value): 97 """Add a new Commit-xxx tag. 98 99 When a Commit-xxx tag is detected, we come here to record it. 100 101 Args: 102 line: Source line containing tag (useful for debug/error messages) 103 name: Tag name (part after 'Commit-') 104 value: Tag value (part after 'Commit-xxx: ') 105 """ 106 if name == 'notes': 107 self.in_section = 'commit-' + name 108 self.skip_blank = False 109 110 def CloseCommit(self): 111 """Save the current commit into our commit list, and reset our state""" 112 if self.commit and self.is_log: 113 self.series.AddCommit(self.commit) 114 self.commit = None 115 116 def FormatTags(self, tags): 117 out_list = [] 118 for tag in sorted(tags): 119 if tag.startswith('Cc:'): 120 tag_list = tag[4:].split(',') 121 out_list += gitutil.BuildEmailList(tag_list, 'Cc:') 122 else: 123 out_list.append(tag) 124 return out_list 125 126 def ProcessLine(self, line): 127 """Process a single line of a patch file or commit log 128 129 This process a line and returns a list of lines to output. The list 130 may be empty or may contain multiple output lines. 131 132 This is where all the complicated logic is located. The class's 133 state is used to move between different states and detect things 134 properly. 135 136 We can be in one of two modes: 137 self.is_log == True: This is 'git log' mode, where most output is 138 indented by 4 characters and we are scanning for tags 139 140 self.is_log == False: This is 'patch' mode, where we already have 141 all the tags, and are processing patches to remove junk we 142 don't want, and add things we think are required. 143 144 Args: 145 line: text line to process 146 147 Returns: 148 list of output lines, or [] if nothing should be output 149 """ 150 # Initially we have no output. Prepare the input line string 151 out = [] 152 line = line.rstrip('\n') 153 if self.is_log: 154 if line[:4] == ' ': 155 line = line[4:] 156 157 # Handle state transition and skipping blank lines 158 series_tag_match = re_series_tag.match(line) 159 commit_tag_match = re_commit_tag.match(line) 160 commit_match = re_commit.match(line) if self.is_log else None 161 cover_cc_match = re_cover_cc.match(line) 162 tag_match = None 163 if self.state == STATE_PATCH_HEADER: 164 tag_match = re_tag.match(line) 165 is_blank = not line.strip() 166 if is_blank: 167 if (self.state == STATE_MSG_HEADER 168 or self.state == STATE_PATCH_SUBJECT): 169 self.state += 1 170 171 # We don't have a subject in the text stream of patch files 172 # It has its own line with a Subject: tag 173 if not self.is_log and self.state == STATE_PATCH_SUBJECT: 174 self.state += 1 175 elif commit_match: 176 self.state = STATE_MSG_HEADER 177 178 # If we are in a section, keep collecting lines until we see END 179 if self.in_section: 180 if line == 'END': 181 if self.in_section == 'cover': 182 self.series.cover = self.section 183 elif self.in_section == 'notes': 184 if self.is_log: 185 self.series.notes += self.section 186 elif self.in_section == 'commit-notes': 187 if self.is_log: 188 self.commit.notes += self.section 189 else: 190 self.warn.append("Unknown section '%s'" % self.in_section) 191 self.in_section = None 192 self.skip_blank = True 193 self.section = [] 194 else: 195 self.section.append(line) 196 197 # Detect the commit subject 198 elif not is_blank and self.state == STATE_PATCH_SUBJECT: 199 self.commit.subject = line 200 201 # Detect the tags we want to remove, and skip blank lines 202 elif re_remove.match(line) and not commit_tag_match: 203 self.skip_blank = True 204 205 # TEST= should be the last thing in the commit, so remove 206 # everything after it 207 if line.startswith('TEST='): 208 self.found_test = True 209 elif self.skip_blank and is_blank: 210 self.skip_blank = False 211 212 # Detect the start of a cover letter section 213 elif re_cover.match(line): 214 self.in_section = 'cover' 215 self.skip_blank = False 216 217 elif cover_cc_match: 218 value = cover_cc_match.group(1) 219 self.AddToSeries(line, 'cover-cc', value) 220 221 # If we are in a change list, key collected lines until a blank one 222 elif self.in_change: 223 if is_blank: 224 # Blank line ends this change list 225 self.in_change = 0 226 elif line == '---' or re_signoff.match(line): 227 self.in_change = 0 228 out = self.ProcessLine(line) 229 else: 230 if self.is_log: 231 self.series.AddChange(self.in_change, self.commit, line) 232 self.skip_blank = False 233 234 # Detect Series-xxx tags 235 elif series_tag_match: 236 name = series_tag_match.group(1) 237 value = series_tag_match.group(2) 238 if name == 'changes': 239 # value is the version number: e.g. 1, or 2 240 try: 241 value = int(value) 242 except ValueError as str: 243 raise ValueError("%s: Cannot decode version info '%s'" % 244 (self.commit.hash, line)) 245 self.in_change = int(value) 246 else: 247 self.AddToSeries(line, name, value) 248 self.skip_blank = True 249 250 # Detect Commit-xxx tags 251 elif commit_tag_match: 252 name = commit_tag_match.group(1) 253 value = commit_tag_match.group(2) 254 if name == 'notes': 255 self.AddToCommit(line, name, value) 256 self.skip_blank = True 257 258 # Detect the start of a new commit 259 elif commit_match: 260 self.CloseCommit() 261 # TODO: We should store the whole hash, and just display a subset 262 self.commit = commit.Commit(commit_match.group(1)[:8]) 263 264 # Detect tags in the commit message 265 elif tag_match: 266 # Remove Tested-by self, since few will take much notice 267 if (tag_match.group(1) == 'Tested-by' and 268 tag_match.group(2).find(os.getenv('USER') + '@') != -1): 269 self.warn.append("Ignoring %s" % line) 270 elif tag_match.group(1) == 'Patch-cc': 271 self.commit.AddCc(tag_match.group(2).split(',')) 272 else: 273 self.tags.append(line); 274 275 # Well that means this is an ordinary line 276 else: 277 pos = 1 278 # Look for ugly ASCII characters 279 for ch in line: 280 # TODO: Would be nicer to report source filename and line 281 if ord(ch) > 0x80: 282 self.warn.append("Line %d/%d ('%s') has funny ascii char" % 283 (self.linenum, pos, line)) 284 pos += 1 285 286 # Look for space before tab 287 m = re_space_before_tab.match(line) 288 if m: 289 self.warn.append('Line %d/%d has space before tab' % 290 (self.linenum, m.start())) 291 292 # OK, we have a valid non-blank line 293 out = [line] 294 self.linenum += 1 295 self.skip_blank = False 296 if self.state == STATE_DIFFS: 297 pass 298 299 # If this is the start of the diffs section, emit our tags and 300 # change log 301 elif line == '---': 302 self.state = STATE_DIFFS 303 304 # Output the tags (signeoff first), then change list 305 out = [] 306 log = self.series.MakeChangeLog(self.commit) 307 out += self.FormatTags(self.tags) 308 out += [line] + self.commit.notes + [''] + log 309 elif self.found_test: 310 if not re_allowed_after_test.match(line): 311 self.lines_after_test += 1 312 313 return out 314 315 def Finalize(self): 316 """Close out processing of this patch stream""" 317 self.CloseCommit() 318 if self.lines_after_test: 319 self.warn.append('Found %d lines after TEST=' % 320 self.lines_after_test) 321 322 def ProcessStream(self, infd, outfd): 323 """Copy a stream from infd to outfd, filtering out unwanting things. 324 325 This is used to process patch files one at a time. 326 327 Args: 328 infd: Input stream file object 329 outfd: Output stream file object 330 """ 331 # Extract the filename from each diff, for nice warnings 332 fname = None 333 last_fname = None 334 re_fname = re.compile('diff --git a/(.*) b/.*') 335 while True: 336 line = infd.readline() 337 if not line: 338 break 339 out = self.ProcessLine(line) 340 341 # Try to detect blank lines at EOF 342 for line in out: 343 match = re_fname.match(line) 344 if match: 345 last_fname = fname 346 fname = match.group(1) 347 if line == '+': 348 self.blank_count += 1 349 else: 350 if self.blank_count and (line == '-- ' or match): 351 self.warn.append("Found possible blank line(s) at " 352 "end of file '%s'" % last_fname) 353 outfd.write('+\n' * self.blank_count) 354 outfd.write(line + '\n') 355 self.blank_count = 0 356 self.Finalize() 357 358 359def GetMetaDataForList(commit_range, git_dir=None, count=None, 360 series = Series()): 361 """Reads out patch series metadata from the commits 362 363 This does a 'git log' on the relevant commits and pulls out the tags we 364 are interested in. 365 366 Args: 367 commit_range: Range of commits to count (e.g. 'HEAD..base') 368 git_dir: Path to git repositiory (None to use default) 369 count: Number of commits to list, or None for no limit 370 series: Series object to add information into. By default a new series 371 is started. 372 Returns: 373 A Series object containing information about the commits. 374 """ 375 params = ['git', 'log', '--no-color', '--reverse', '--no-decorate', 376 commit_range] 377 if count is not None: 378 params[2:2] = ['-n%d' % count] 379 if git_dir: 380 params[1:1] = ['--git-dir', git_dir] 381 pipe = [params] 382 stdout = command.RunPipe(pipe, capture=True).stdout 383 ps = PatchStream(series, is_log=True) 384 for line in stdout.splitlines(): 385 ps.ProcessLine(line) 386 ps.Finalize() 387 return series 388 389def GetMetaData(start, count): 390 """Reads out patch series metadata from the commits 391 392 This does a 'git log' on the relevant commits and pulls out the tags we 393 are interested in. 394 395 Args: 396 start: Commit to start from: 0=HEAD, 1=next one, etc. 397 count: Number of commits to list 398 """ 399 return GetMetaDataForList('HEAD~%d' % start, None, count) 400 401def FixPatch(backup_dir, fname, series, commit): 402 """Fix up a patch file, by adding/removing as required. 403 404 We remove our tags from the patch file, insert changes lists, etc. 405 The patch file is processed in place, and overwritten. 406 407 A backup file is put into backup_dir (if not None). 408 409 Args: 410 fname: Filename to patch file to process 411 series: Series information about this patch set 412 commit: Commit object for this patch file 413 Return: 414 A list of errors, or [] if all ok. 415 """ 416 handle, tmpname = tempfile.mkstemp() 417 outfd = os.fdopen(handle, 'w') 418 infd = open(fname, 'r') 419 ps = PatchStream(series) 420 ps.commit = commit 421 ps.ProcessStream(infd, outfd) 422 infd.close() 423 outfd.close() 424 425 # Create a backup file if required 426 if backup_dir: 427 shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname))) 428 shutil.move(tmpname, fname) 429 return ps.warn 430 431def FixPatches(series, fnames): 432 """Fix up a list of patches identified by filenames 433 434 The patch files are processed in place, and overwritten. 435 436 Args: 437 series: The series object 438 fnames: List of patch files to process 439 """ 440 # Current workflow creates patches, so we shouldn't need a backup 441 backup_dir = None #tempfile.mkdtemp('clean-patch') 442 count = 0 443 for fname in fnames: 444 commit = series.commits[count] 445 commit.patch = fname 446 result = FixPatch(backup_dir, fname, series, commit) 447 if result: 448 print '%d warnings for %s:' % (len(result), fname) 449 for warn in result: 450 print '\t', warn 451 print 452 count += 1 453 print 'Cleaned %d patches' % count 454 return series 455 456def InsertCoverLetter(fname, series, count): 457 """Inserts a cover letter with the required info into patch 0 458 459 Args: 460 fname: Input / output filename of the cover letter file 461 series: Series object 462 count: Number of patches in the series 463 """ 464 fd = open(fname, 'r') 465 lines = fd.readlines() 466 fd.close() 467 468 fd = open(fname, 'w') 469 text = series.cover 470 prefix = series.GetPatchPrefix() 471 for line in lines: 472 if line.startswith('Subject:'): 473 # TODO: if more than 10 patches this should save 00/xx, not 0/xx 474 line = 'Subject: [%s 0/%d] %s\n' % (prefix, count, text[0]) 475 476 # Insert our cover letter 477 elif line.startswith('*** BLURB HERE ***'): 478 # First the blurb test 479 line = '\n'.join(text[1:]) + '\n' 480 if series.get('notes'): 481 line += '\n'.join(series.notes) + '\n' 482 483 # Now the change list 484 out = series.MakeChangeLog(None) 485 line += '\n' + '\n'.join(out) 486 fd.write(line) 487 fd.close() 488