1#!/usr/bin/env python 2# 3# Copyright (C) 2014, Masahiro Yamada <yamada.m@jp.panasonic.com> 4# 5# SPDX-License-Identifier: GPL-2.0+ 6# 7 8''' 9A tool to create/update the mailmap file 10 11The command 'git shortlog' summarizes git log output in a format suitable 12for inclusion in release announcements. Each commit will be grouped by 13author and title. 14 15One problem is that the authors' name and/or email address is sometimes 16spelled differently. The .mailmap feature can be used to coalesce together 17commits by the same persion. 18(See 'man git-shortlog' for furthur information of this feature.) 19 20This tool helps to create/update the mailmap file. 21 22It runs 'git shortlog' internally and searches differently spelled author 23names which share the same email address. The author name with the most 24commits is asuumed to be a canonical real name. If the number of commits 25from the cananonical name is equal to or greater than 'MIN_COMMITS', 26the entry for the cananical name will be output. ('MIN_COMMITS' is used 27here because we do not want to create a fat mailmap by adding every author 28with only a few commits.) 29 30If there exists a mailmap file specified by the mailmap.file configuration 31options or '.mailmap' at the toplevel of the repository, it is used as 32a base file. (The mailmap.file configuration takes precedence over the 33'.mailmap' file if both exist.) 34 35The base file and the newly added entries are merged together and sorted 36alphabetically (but the comment block is kept untouched), and then printed 37to standard output. 38 39Usage 40----- 41 42 scripts/mailmapper 43 44prints the mailmapping to standard output. 45 46 scripts/mailmapper > tmp; mv tmp .mailmap 47 48will be useful for updating '.mailmap' file. 49''' 50 51import sys 52import os 53import subprocess 54 55# The entries only for the canonical names with MIN_COMMITS or more commits. 56# This limitation is used so as not to create a too big mailmap file. 57MIN_COMMITS = 50 58 59try: 60 toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel']) 61except subprocess.CalledProcessError: 62 print >> sys.stderr, 'Please run in a git repository.' 63 sys.exit(1) 64 65# strip '\n' 66toplevel = toplevel.rstrip() 67 68# Change the current working directory to the toplevel of the respository 69# for our easier life. 70os.chdir(toplevel) 71 72# First, create 'auther name' vs 'number of commits' database. 73# We assume the name with the most commits as the canonical real name. 74shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n']) 75 76commits_per_name = {} 77 78for line in shortlog.splitlines(): 79 try: 80 commits, name = line.split(None, 1) 81 except ValueError: 82 # ignore lines with an empty author name 83 pass 84 commits_per_name[name] = int(commits) 85 86# Next, coalesce the auther names with the same email address 87shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e']) 88 89mail_vs_name = {} 90output = {} 91 92for line in shortlog.splitlines(): 93 # tmp, mail = line.rsplit(None, 1) is not safe 94 # because weird email addresses might include whitespaces 95 tmp, mail = line.split('<') 96 mail = '<' + mail.rstrip() 97 try: 98 _, name = tmp.rstrip().split(None, 1) 99 except ValueError: 100 # author name is empty 101 name = '' 102 if mail in mail_vs_name: 103 # another name for the same email address 104 prev_name = mail_vs_name[mail] 105 # Take the name with more commits 106 major_name = sorted([prev_name, name], 107 key=lambda x: commits_per_name[x] if x else 0)[1] 108 mail_vs_name[mail] = major_name 109 if commits_per_name[major_name] > MIN_COMMITS: 110 output[mail] = major_name 111 else: 112 mail_vs_name[mail] = name 113 114# [1] If there exists a mailmap file at the location pointed to 115# by the mailmap.file configuration option, update it. 116# [2] If the file .mailmap exists at the toplevel of the repository, update it. 117# [3] Otherwise, create a new mailmap file. 118mailmap_files = [] 119 120try: 121 config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file']) 122except subprocess.CalledProcessError: 123 config_mailmap = '' 124 125config_mailmap = config_mailmap.rstrip() 126if config_mailmap: 127 mailmap_files.append(config_mailmap) 128 129mailmap_files.append('.mailmap') 130 131infile = None 132 133for map_file in mailmap_files: 134 try: 135 infile = open(map_file) 136 except: 137 # Failed to open. Try next. 138 continue 139 break 140 141comment_block = [] 142output_lines = [] 143 144if infile: 145 for line in infile: 146 if line[0] == '#' or line[0] == '\n': 147 comment_block.append(line) 148 else: 149 output_lines.append(line) 150 break 151 for line in infile: 152 output_lines.append(line) 153 infile.close() 154 155for mail, name in output.items(): 156 output_lines.append(name + ' ' + mail + '\n') 157 158output_lines.sort() 159 160sys.stdout.write(''.join(comment_block + output_lines)) 161