xref: /openbmc/u-boot/scripts/mailmapper (revision c4e68d3aa8178f6aa63a79c4f8f459c0e3ed58e8)
1#!/usr/bin/env python2
2#
3# Copyright (C) 2014, Masahiro Yamada <yamada.m@jp.panasonic.com>
4#
5# SPDX-License-Identifier:	GPL-2.0+
6#
7
8'''
9A tool to create/update the mailmap file
10
11The command 'git shortlog' summarizes git log output in a format suitable
12for inclusion in release announcements. Each commit will be grouped by
13author and title.
14
15One problem is that the authors' name and/or email address is sometimes
16spelled differently. The .mailmap feature can be used to coalesce together
17commits by the same persion.
18(See 'man git-shortlog' for furthur information of this feature.)
19
20This tool helps to create/update the mailmap file.
21
22It runs 'git shortlog' internally and searches differently spelled author
23names which share the same email address. The author name with the most
24commits is asuumed to be a canonical real name. If the number of commits
25from the cananonical name is equal to or greater than 'MIN_COMMITS',
26the entry for the cananical name will be output. ('MIN_COMMITS' is used
27here because we do not want to create a fat mailmap by adding every author
28with only a few commits.)
29
30If there exists a mailmap file specified by the mailmap.file configuration
31options or '.mailmap' at the toplevel of the repository, it is used as
32a base file. (The mailmap.file configuration takes precedence over the
33'.mailmap' file if both exist.)
34
35The base file and the newly added entries are merged together and sorted
36alphabetically (but the comment block is kept untouched), and then printed
37to standard output.
38
39Usage
40-----
41
42  scripts/mailmapper
43
44prints the mailmapping to standard output.
45
46  scripts/mailmapper > tmp; mv tmp .mailmap
47
48will be useful for updating '.mailmap' file.
49'''
50
51import sys
52import os
53import subprocess
54
55# The entries only for the canonical names with MIN_COMMITS or more commits.
56# This limitation is used so as not to create a too big mailmap file.
57MIN_COMMITS = 50
58
59try:
60    toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'])
61except subprocess.CalledProcessError:
62    sys.exit('Please run in a git repository.')
63
64# strip '\n'
65toplevel = toplevel.rstrip()
66
67# Change the current working directory to the toplevel of the respository
68# for our easier life.
69os.chdir(toplevel)
70
71# First, create 'auther name' vs 'number of commits' database.
72# We assume the name with the most commits as the canonical real name.
73shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n'])
74
75commits_per_name = {}
76
77for line in shortlog.splitlines():
78    try:
79        commits, name = line.split(None, 1)
80    except ValueError:
81        # ignore lines with an empty author name
82        pass
83    commits_per_name[name] = int(commits)
84
85# Next, coalesce the auther names with the same email address
86shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e'])
87
88mail_vs_name = {}
89output = {}
90
91for line in shortlog.splitlines():
92    # tmp, mail = line.rsplit(None, 1) is not safe
93    # because weird email addresses might include whitespaces
94    tmp, mail = line.split('<')
95    mail = '<' + mail.rstrip()
96    try:
97        _, name = tmp.rstrip().split(None, 1)
98    except ValueError:
99        # author name is empty
100        name = ''
101    if mail in mail_vs_name:
102        # another name for the same email address
103        prev_name = mail_vs_name[mail]
104        # Take the name with more commits
105        major_name = sorted([prev_name, name],
106                            key=lambda x: commits_per_name[x] if x else 0)[1]
107        mail_vs_name[mail] = major_name
108        if commits_per_name[major_name] > MIN_COMMITS:
109            output[mail] = major_name
110    else:
111        mail_vs_name[mail] = name
112
113# [1] If there exists a mailmap file at the location pointed to
114#     by the mailmap.file configuration option, update it.
115# [2] If the file .mailmap exists at the toplevel of the repository, update it.
116# [3] Otherwise, create a new mailmap file.
117mailmap_files = []
118
119try:
120    config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file'])
121except subprocess.CalledProcessError:
122    config_mailmap = ''
123
124config_mailmap = config_mailmap.rstrip()
125if config_mailmap:
126    mailmap_files.append(config_mailmap)
127
128mailmap_files.append('.mailmap')
129
130infile = None
131
132for map_file in mailmap_files:
133    try:
134        infile = open(map_file)
135    except:
136        # Failed to open. Try next.
137        continue
138    break
139
140comment_block = []
141output_lines = []
142
143if infile:
144    for line in infile:
145        if line[0] == '#' or line[0] == '\n':
146            comment_block.append(line)
147        else:
148            output_lines.append(line)
149            break
150    for line in infile:
151        output_lines.append(line)
152    infile.close()
153
154for mail, name in output.items():
155    output_lines.append(name + ' ' + mail + '\n')
156
157output_lines.sort()
158
159sys.stdout.write(''.join(comment_block + output_lines))
160