1#!/usr/bin/env python2
2# SPDX-License-Identifier: GPL-2.0+
3#
4# Copyright (C) 2014, Masahiro Yamada <yamada.m@jp.panasonic.com>
5
6'''
7A tool to create/update the mailmap file
8
9The command 'git shortlog' summarizes git log output in a format suitable
10for inclusion in release announcements. Each commit will be grouped by
11author and title.
12
13One problem is that the authors' name and/or email address is sometimes
14spelled differently. The .mailmap feature can be used to coalesce together
15commits by the same persion.
16(See 'man git-shortlog' for furthur information of this feature.)
17
18This tool helps to create/update the mailmap file.
19
20It runs 'git shortlog' internally and searches differently spelled author
21names which share the same email address. The author name with the most
22commits is asuumed to be a canonical real name. If the number of commits
23from the cananonical name is equal to or greater than 'MIN_COMMITS',
24the entry for the cananical name will be output. ('MIN_COMMITS' is used
25here because we do not want to create a fat mailmap by adding every author
26with only a few commits.)
27
28If there exists a mailmap file specified by the mailmap.file configuration
29options or '.mailmap' at the toplevel of the repository, it is used as
30a base file. (The mailmap.file configuration takes precedence over the
31'.mailmap' file if both exist.)
32
33The base file and the newly added entries are merged together and sorted
34alphabetically (but the comment block is kept untouched), and then printed
35to standard output.
36
37Usage
38-----
39
40  scripts/mailmapper
41
42prints the mailmapping to standard output.
43
44  scripts/mailmapper > tmp; mv tmp .mailmap
45
46will be useful for updating '.mailmap' file.
47'''
48
49import sys
50import os
51import subprocess
52
53# The entries only for the canonical names with MIN_COMMITS or more commits.
54# This limitation is used so as not to create a too big mailmap file.
55MIN_COMMITS = 50
56
57try:
58    toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'])
59except subprocess.CalledProcessError:
60    sys.exit('Please run in a git repository.')
61
62# strip '\n'
63toplevel = toplevel.rstrip()
64
65# Change the current working directory to the toplevel of the respository
66# for our easier life.
67os.chdir(toplevel)
68
69# First, create 'auther name' vs 'number of commits' database.
70# We assume the name with the most commits as the canonical real name.
71shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n'])
72
73commits_per_name = {}
74
75for line in shortlog.splitlines():
76    try:
77        commits, name = line.split(None, 1)
78    except ValueError:
79        # ignore lines with an empty author name
80        pass
81    commits_per_name[name] = int(commits)
82
83# Next, coalesce the auther names with the same email address
84shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e'])
85
86mail_vs_name = {}
87output = {}
88
89for line in shortlog.splitlines():
90    # tmp, mail = line.rsplit(None, 1) is not safe
91    # because weird email addresses might include whitespaces
92    tmp, mail = line.split('<')
93    mail = '<' + mail.rstrip()
94    try:
95        _, name = tmp.rstrip().split(None, 1)
96    except ValueError:
97        # author name is empty
98        name = ''
99    if mail in mail_vs_name:
100        # another name for the same email address
101        prev_name = mail_vs_name[mail]
102        # Take the name with more commits
103        major_name = sorted([prev_name, name],
104                            key=lambda x: commits_per_name[x] if x else 0)[1]
105        mail_vs_name[mail] = major_name
106        if commits_per_name[major_name] > MIN_COMMITS:
107            output[mail] = major_name
108    else:
109        mail_vs_name[mail] = name
110
111# [1] If there exists a mailmap file at the location pointed to
112#     by the mailmap.file configuration option, update it.
113# [2] If the file .mailmap exists at the toplevel of the repository, update it.
114# [3] Otherwise, create a new mailmap file.
115mailmap_files = []
116
117try:
118    config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file'])
119except subprocess.CalledProcessError:
120    config_mailmap = ''
121
122config_mailmap = config_mailmap.rstrip()
123if config_mailmap:
124    mailmap_files.append(config_mailmap)
125
126mailmap_files.append('.mailmap')
127
128infile = None
129
130for map_file in mailmap_files:
131    try:
132        infile = open(map_file)
133    except:
134        # Failed to open. Try next.
135        continue
136    break
137
138comment_block = []
139output_lines = []
140
141if infile:
142    for line in infile:
143        if line[0] == '#' or line[0] == '\n':
144            comment_block.append(line)
145        else:
146            output_lines.append(line)
147            break
148    for line in infile:
149        output_lines.append(line)
150    infile.close()
151
152for mail, name in output.items():
153    output_lines.append(name + ' ' + mail + '\n')
154
155output_lines.sort()
156
157sys.stdout.write(''.join(comment_block + output_lines))
158