1#! /usr/bin/env python3 2 3# series.py 4# 5# Read a series' mbox file and get information about the patches 6# contained 7# 8# Copyright (C) 2024 BayLibre SAS 9# 10# SPDX-License-Identifier: GPL-2.0-only 11# 12 13import email 14import re 15 16# From: https://stackoverflow.com/questions/59681461/read-a-big-mbox-file-with-python 17class MboxReader: 18 def __init__(self, filepath): 19 self.handle = open(filepath, 'rb') 20 assert self.handle.readline().startswith(b'From ') 21 22 def __enter__(self): 23 return self 24 25 def __exit__(self, exc_type, exc_value, exc_traceback): 26 self.handle.close() 27 28 def __iter__(self): 29 return iter(self.__next__()) 30 31 def __next__(self): 32 lines = [] 33 while True: 34 line = self.handle.readline() 35 if line == b'' or line.startswith(b'From '): 36 yield email.message_from_bytes(b''.join(lines)) 37 if line == b'': 38 break 39 lines = [] 40 continue 41 lines.append(line) 42 43class Patch: 44 def __init__(self, data): 45 self.author = data['From'] 46 self.to = data['To'] 47 self.cc = data['Cc'] 48 self.subject = data['Subject'] 49 self.split_body = re.split('---', data.get_payload(), maxsplit=1) 50 self.commit_message = self.split_body[0] 51 self.diff = self.split_body[1] 52 53class PatchSeries: 54 def __init__(self, filepath): 55 with MboxReader(filepath) as mbox: 56 self.patches = [Patch(message) for message in mbox] 57 58 assert self.patches 59 self.patch_count = len(self.patches) 60 self.path = filepath 61 62 @property 63 def path(self): 64 return self.path 65 66 self.branch = self.get_branch() 67 68 def get_branch(self): 69 fullprefix = "" 70 pattern = re.compile(r"(\[.*\])", re.DOTALL) 71 72 # There should be at least one patch in the series and it should 73 # include the branch name in the subject, so parse that 74 match = pattern.search(self.patches[0].subject) 75 if match: 76 fullprefix = match.group(1) 77 78 branch, branches, valid_branches = None, [], [] 79 80 if fullprefix: 81 prefix = fullprefix.strip('[]') 82 branches = [ b.strip() for b in prefix.split(',')] 83 valid_branches = [b for b in branches if PatchSeries.valid_branch(b)] 84 85 if len(valid_branches): 86 branch = valid_branches[0] 87 88 # Get the branch name excluding any brackets. If nothing was 89 # found, then assume there was no branch tag in the subject line 90 # and that the patch targets master 91 if branch is not None: 92 return branch.split(']')[0] 93 else: 94 return "master" 95 96 @staticmethod 97 def valid_branch(branch): 98 """ Check if branch is valid name """ 99 lbranch = branch.lower() 100 101 invalid = lbranch.startswith('patch') or \ 102 lbranch.startswith('rfc') or \ 103 lbranch.startswith('resend') or \ 104 re.search(r'^v\d+', lbranch) or \ 105 re.search(r'^\d+/\d+', lbranch) 106 107 return not invalid 108 109