1# Copyright (c) 2011 The Chromium OS Authors. 2# 3# SPDX-License-Identifier: GPL-2.0+ 4# 5 6import os 7import re 8import shutil 9import tempfile 10 11import command 12import commit 13import gitutil 14from series import Series 15 16# Tags that we detect and remove 17re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:' 18 '|Reviewed-on:|Commit-\w*:') 19 20# Lines which are allowed after a TEST= line 21re_allowed_after_test = re.compile('^Signed-off-by:') 22 23# Signoffs 24re_signoff = re.compile('^Signed-off-by: *(.*)') 25 26# The start of the cover letter 27re_cover = re.compile('^Cover-letter:') 28 29# A cover letter Cc 30re_cover_cc = re.compile('^Cover-letter-cc: *(.*)') 31 32# Patch series tag 33re_series_tag = re.compile('^Series-([a-z-]*): *(.*)') 34 35# Commit series tag 36re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)') 37 38# Commit tags that we want to collect and keep 39re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)') 40 41# The start of a new commit in the git log 42re_commit = re.compile('^commit ([0-9a-f]*)$') 43 44# We detect these since checkpatch doesn't always do it 45re_space_before_tab = re.compile('^[+].* \t') 46 47# States we can be in - can we use range() and still have comments? 48STATE_MSG_HEADER = 0 # Still in the message header 49STATE_PATCH_SUBJECT = 1 # In patch subject (first line of log for a commit) 50STATE_PATCH_HEADER = 2 # In patch header (after the subject) 51STATE_DIFFS = 3 # In the diff part (past --- line) 52 53class PatchStream: 54 """Class for detecting/injecting tags in a patch or series of patches 55 56 We support processing the output of 'git log' to read out the tags we 57 are interested in. We can also process a patch file in order to remove 58 unwanted tags or inject additional ones. These correspond to the two 59 phases of processing. 60 """ 61 def __init__(self, series, name=None, is_log=False): 62 self.skip_blank = False # True to skip a single blank line 63 self.found_test = False # Found a TEST= line 64 self.lines_after_test = 0 # MNumber of lines found after TEST= 65 self.warn = [] # List of warnings we have collected 66 self.linenum = 1 # Output line number we are up to 67 self.in_section = None # Name of start...END section we are in 68 self.notes = [] # Series notes 69 self.section = [] # The current section...END section 70 self.series = series # Info about the patch series 71 self.is_log = is_log # True if indent like git log 72 self.in_change = 0 # Non-zero if we are in a change list 73 self.blank_count = 0 # Number of blank lines stored up 74 self.state = STATE_MSG_HEADER # What state are we in? 75 self.signoff = [] # Contents of signoff line 76 self.commit = None # Current commit 77 78 def AddToSeries(self, line, name, value): 79 """Add a new Series-xxx tag. 80 81 When a Series-xxx tag is detected, we come here to record it, if we 82 are scanning a 'git log'. 83 84 Args: 85 line: Source line containing tag (useful for debug/error messages) 86 name: Tag name (part after 'Series-') 87 value: Tag value (part after 'Series-xxx: ') 88 """ 89 if name == 'notes': 90 self.in_section = name 91 self.skip_blank = False 92 if self.is_log: 93 self.series.AddTag(self.commit, line, name, value) 94 95 def AddToCommit(self, line, name, value): 96 """Add a new Commit-xxx tag. 97 98 When a Commit-xxx tag is detected, we come here to record it. 99 100 Args: 101 line: Source line containing tag (useful for debug/error messages) 102 name: Tag name (part after 'Commit-') 103 value: Tag value (part after 'Commit-xxx: ') 104 """ 105 if name == 'notes': 106 self.in_section = 'commit-' + name 107 self.skip_blank = False 108 109 def CloseCommit(self): 110 """Save the current commit into our commit list, and reset our state""" 111 if self.commit and self.is_log: 112 self.series.AddCommit(self.commit) 113 self.commit = None 114 115 def ProcessLine(self, line): 116 """Process a single line of a patch file or commit log 117 118 This process a line and returns a list of lines to output. The list 119 may be empty or may contain multiple output lines. 120 121 This is where all the complicated logic is located. The class's 122 state is used to move between different states and detect things 123 properly. 124 125 We can be in one of two modes: 126 self.is_log == True: This is 'git log' mode, where most output is 127 indented by 4 characters and we are scanning for tags 128 129 self.is_log == False: This is 'patch' mode, where we already have 130 all the tags, and are processing patches to remove junk we 131 don't want, and add things we think are required. 132 133 Args: 134 line: text line to process 135 136 Returns: 137 list of output lines, or [] if nothing should be output 138 """ 139 # Initially we have no output. Prepare the input line string 140 out = [] 141 line = line.rstrip('\n') 142 if self.is_log: 143 if line[:4] == ' ': 144 line = line[4:] 145 146 # Handle state transition and skipping blank lines 147 series_tag_match = re_series_tag.match(line) 148 commit_tag_match = re_commit_tag.match(line) 149 commit_match = re_commit.match(line) if self.is_log else None 150 cover_cc_match = re_cover_cc.match(line) 151 signoff_match = re_signoff.match(line) 152 tag_match = None 153 if self.state == STATE_PATCH_HEADER: 154 tag_match = re_tag.match(line) 155 is_blank = not line.strip() 156 if is_blank: 157 if (self.state == STATE_MSG_HEADER 158 or self.state == STATE_PATCH_SUBJECT): 159 self.state += 1 160 161 # We don't have a subject in the text stream of patch files 162 # It has its own line with a Subject: tag 163 if not self.is_log and self.state == STATE_PATCH_SUBJECT: 164 self.state += 1 165 elif commit_match: 166 self.state = STATE_MSG_HEADER 167 168 # If we are in a section, keep collecting lines until we see END 169 if self.in_section: 170 if line == 'END': 171 if self.in_section == 'cover': 172 self.series.cover = self.section 173 elif self.in_section == 'notes': 174 if self.is_log: 175 self.series.notes += self.section 176 elif self.in_section == 'commit-notes': 177 if self.is_log: 178 self.commit.notes += self.section 179 else: 180 self.warn.append("Unknown section '%s'" % self.in_section) 181 self.in_section = None 182 self.skip_blank = True 183 self.section = [] 184 else: 185 self.section.append(line) 186 187 # Detect the commit subject 188 elif not is_blank and self.state == STATE_PATCH_SUBJECT: 189 self.commit.subject = line 190 191 # Detect the tags we want to remove, and skip blank lines 192 elif re_remove.match(line) and not commit_tag_match: 193 self.skip_blank = True 194 195 # TEST= should be the last thing in the commit, so remove 196 # everything after it 197 if line.startswith('TEST='): 198 self.found_test = True 199 elif self.skip_blank and is_blank: 200 self.skip_blank = False 201 202 # Detect the start of a cover letter section 203 elif re_cover.match(line): 204 self.in_section = 'cover' 205 self.skip_blank = False 206 207 elif cover_cc_match: 208 value = cover_cc_match.group(1) 209 self.AddToSeries(line, 'cover-cc', value) 210 211 # If we are in a change list, key collected lines until a blank one 212 elif self.in_change: 213 if is_blank: 214 # Blank line ends this change list 215 self.in_change = 0 216 elif line == '---': 217 self.in_change = 0 218 out = self.ProcessLine(line) 219 else: 220 if self.is_log: 221 self.series.AddChange(self.in_change, self.commit, line) 222 self.skip_blank = False 223 224 # Detect Series-xxx tags 225 elif series_tag_match: 226 name = series_tag_match.group(1) 227 value = series_tag_match.group(2) 228 if name == 'changes': 229 # value is the version number: e.g. 1, or 2 230 try: 231 value = int(value) 232 except ValueError as str: 233 raise ValueError("%s: Cannot decode version info '%s'" % 234 (self.commit.hash, line)) 235 self.in_change = int(value) 236 else: 237 self.AddToSeries(line, name, value) 238 self.skip_blank = True 239 240 # Detect Commit-xxx tags 241 elif commit_tag_match: 242 name = commit_tag_match.group(1) 243 value = commit_tag_match.group(2) 244 if name == 'notes': 245 self.AddToCommit(line, name, value) 246 self.skip_blank = True 247 248 # Detect the start of a new commit 249 elif commit_match: 250 self.CloseCommit() 251 self.commit = commit.Commit(commit_match.group(1)) 252 253 # Detect tags in the commit message 254 elif tag_match: 255 # Remove Tested-by self, since few will take much notice 256 if (tag_match.group(1) == 'Tested-by' and 257 tag_match.group(2).find(os.getenv('USER') + '@') != -1): 258 self.warn.append("Ignoring %s" % line) 259 elif tag_match.group(1) == 'Patch-cc': 260 self.commit.AddCc(tag_match.group(2).split(',')) 261 else: 262 out = [line] 263 264 # Suppress duplicate signoffs 265 elif signoff_match: 266 if (self.is_log or not self.commit or 267 self.commit.CheckDuplicateSignoff(signoff_match.group(1))): 268 out = [line] 269 270 # Well that means this is an ordinary line 271 else: 272 pos = 1 273 # Look for ugly ASCII characters 274 for ch in line: 275 # TODO: Would be nicer to report source filename and line 276 if ord(ch) > 0x80: 277 self.warn.append("Line %d/%d ('%s') has funny ascii char" % 278 (self.linenum, pos, line)) 279 pos += 1 280 281 # Look for space before tab 282 m = re_space_before_tab.match(line) 283 if m: 284 self.warn.append('Line %d/%d has space before tab' % 285 (self.linenum, m.start())) 286 287 # OK, we have a valid non-blank line 288 out = [line] 289 self.linenum += 1 290 self.skip_blank = False 291 if self.state == STATE_DIFFS: 292 pass 293 294 # If this is the start of the diffs section, emit our tags and 295 # change log 296 elif line == '---': 297 self.state = STATE_DIFFS 298 299 # Output the tags (signeoff first), then change list 300 out = [] 301 log = self.series.MakeChangeLog(self.commit) 302 out += [line] 303 if self.commit: 304 out += self.commit.notes 305 out += [''] + log 306 elif self.found_test: 307 if not re_allowed_after_test.match(line): 308 self.lines_after_test += 1 309 310 return out 311 312 def Finalize(self): 313 """Close out processing of this patch stream""" 314 self.CloseCommit() 315 if self.lines_after_test: 316 self.warn.append('Found %d lines after TEST=' % 317 self.lines_after_test) 318 319 def ProcessStream(self, infd, outfd): 320 """Copy a stream from infd to outfd, filtering out unwanting things. 321 322 This is used to process patch files one at a time. 323 324 Args: 325 infd: Input stream file object 326 outfd: Output stream file object 327 """ 328 # Extract the filename from each diff, for nice warnings 329 fname = None 330 last_fname = None 331 re_fname = re.compile('diff --git a/(.*) b/.*') 332 while True: 333 line = infd.readline() 334 if not line: 335 break 336 out = self.ProcessLine(line) 337 338 # Try to detect blank lines at EOF 339 for line in out: 340 match = re_fname.match(line) 341 if match: 342 last_fname = fname 343 fname = match.group(1) 344 if line == '+': 345 self.blank_count += 1 346 else: 347 if self.blank_count and (line == '-- ' or match): 348 self.warn.append("Found possible blank line(s) at " 349 "end of file '%s'" % last_fname) 350 outfd.write('+\n' * self.blank_count) 351 outfd.write(line + '\n') 352 self.blank_count = 0 353 self.Finalize() 354 355 356def GetMetaDataForList(commit_range, git_dir=None, count=None, 357 series = None, allow_overwrite=False): 358 """Reads out patch series metadata from the commits 359 360 This does a 'git log' on the relevant commits and pulls out the tags we 361 are interested in. 362 363 Args: 364 commit_range: Range of commits to count (e.g. 'HEAD..base') 365 git_dir: Path to git repositiory (None to use default) 366 count: Number of commits to list, or None for no limit 367 series: Series object to add information into. By default a new series 368 is started. 369 allow_overwrite: Allow tags to overwrite an existing tag 370 Returns: 371 A Series object containing information about the commits. 372 """ 373 if not series: 374 series = Series() 375 series.allow_overwrite = allow_overwrite 376 params = gitutil.LogCmd(commit_range,reverse=True, count=count, 377 git_dir=git_dir) 378 stdout = command.RunPipe([params], capture=True).stdout 379 ps = PatchStream(series, is_log=True) 380 for line in stdout.splitlines(): 381 ps.ProcessLine(line) 382 ps.Finalize() 383 return series 384 385def GetMetaData(start, count): 386 """Reads out patch series metadata from the commits 387 388 This does a 'git log' on the relevant commits and pulls out the tags we 389 are interested in. 390 391 Args: 392 start: Commit to start from: 0=HEAD, 1=next one, etc. 393 count: Number of commits to list 394 """ 395 return GetMetaDataForList('HEAD~%d' % start, None, count) 396 397def FixPatch(backup_dir, fname, series, commit): 398 """Fix up a patch file, by adding/removing as required. 399 400 We remove our tags from the patch file, insert changes lists, etc. 401 The patch file is processed in place, and overwritten. 402 403 A backup file is put into backup_dir (if not None). 404 405 Args: 406 fname: Filename to patch file to process 407 series: Series information about this patch set 408 commit: Commit object for this patch file 409 Return: 410 A list of errors, or [] if all ok. 411 """ 412 handle, tmpname = tempfile.mkstemp() 413 outfd = os.fdopen(handle, 'w') 414 infd = open(fname, 'r') 415 ps = PatchStream(series) 416 ps.commit = commit 417 ps.ProcessStream(infd, outfd) 418 infd.close() 419 outfd.close() 420 421 # Create a backup file if required 422 if backup_dir: 423 shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname))) 424 shutil.move(tmpname, fname) 425 return ps.warn 426 427def FixPatches(series, fnames): 428 """Fix up a list of patches identified by filenames 429 430 The patch files are processed in place, and overwritten. 431 432 Args: 433 series: The series object 434 fnames: List of patch files to process 435 """ 436 # Current workflow creates patches, so we shouldn't need a backup 437 backup_dir = None #tempfile.mkdtemp('clean-patch') 438 count = 0 439 for fname in fnames: 440 commit = series.commits[count] 441 commit.patch = fname 442 result = FixPatch(backup_dir, fname, series, commit) 443 if result: 444 print '%d warnings for %s:' % (len(result), fname) 445 for warn in result: 446 print '\t', warn 447 print 448 count += 1 449 print 'Cleaned %d patches' % count 450 return series 451 452def InsertCoverLetter(fname, series, count): 453 """Inserts a cover letter with the required info into patch 0 454 455 Args: 456 fname: Input / output filename of the cover letter file 457 series: Series object 458 count: Number of patches in the series 459 """ 460 fd = open(fname, 'r') 461 lines = fd.readlines() 462 fd.close() 463 464 fd = open(fname, 'w') 465 text = series.cover 466 prefix = series.GetPatchPrefix() 467 for line in lines: 468 if line.startswith('Subject:'): 469 # TODO: if more than 10 patches this should save 00/xx, not 0/xx 470 line = 'Subject: [%s 0/%d] %s\n' % (prefix, count, text[0]) 471 472 # Insert our cover letter 473 elif line.startswith('*** BLURB HERE ***'): 474 # First the blurb test 475 line = '\n'.join(text[1:]) + '\n' 476 if series.get('notes'): 477 line += '\n'.join(series.notes) + '\n' 478 479 # Now the change list 480 out = series.MakeChangeLog(None) 481 line += '\n' + '\n'.join(out) 482 fd.write(line) 483 fd.close() 484