xref: /openbmc/linux/scripts/get_maintainer.pl (revision 9a6b55ac)
1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
4# (c) 2007, Joe Perches <joe@perches.com>
5#           created from checkpatch.pl
6#
7# Print selected MAINTAINERS information for
8# the files modified in a patch or for a file
9#
10# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
11#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
12
13use warnings;
14use strict;
15
16my $P = $0;
17my $V = '0.26';
18
19use Getopt::Long qw(:config no_auto_abbrev);
20use Cwd;
21use File::Find;
22
23my $cur_path = fastgetcwd() . '/';
24my $lk_path = "./";
25my $email = 1;
26my $email_usename = 1;
27my $email_maintainer = 1;
28my $email_reviewer = 1;
29my $email_fixes = 1;
30my $email_list = 1;
31my $email_moderated_list = 1;
32my $email_subscriber_list = 0;
33my $email_git_penguin_chiefs = 0;
34my $email_git = 0;
35my $email_git_all_signature_types = 0;
36my $email_git_blame = 0;
37my $email_git_blame_signatures = 1;
38my $email_git_fallback = 1;
39my $email_git_min_signatures = 1;
40my $email_git_max_maintainers = 5;
41my $email_git_min_percent = 5;
42my $email_git_since = "1-year-ago";
43my $email_hg_since = "-365";
44my $interactive = 0;
45my $email_remove_duplicates = 1;
46my $email_use_mailmap = 1;
47my $output_multiline = 1;
48my $output_separator = ", ";
49my $output_roles = 0;
50my $output_rolestats = 1;
51my $output_section_maxlen = 50;
52my $scm = 0;
53my $tree = 1;
54my $web = 0;
55my $subsystem = 0;
56my $status = 0;
57my $letters = "";
58my $keywords = 1;
59my $sections = 0;
60my $file_emails = 0;
61my $from_filename = 0;
62my $pattern_depth = 0;
63my $self_test = undef;
64my $version = 0;
65my $help = 0;
66my $find_maintainer_files = 0;
67my $maintainer_path;
68my $vcs_used = 0;
69
70my $exit = 0;
71
72my %commit_author_hash;
73my %commit_signer_hash;
74
75my @penguin_chief = ();
76push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
77#Andrew wants in on most everything - 2009/01/14
78#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
79
80my @penguin_chief_names = ();
81foreach my $chief (@penguin_chief) {
82    if ($chief =~ m/^(.*):(.*)/) {
83	my $chief_name = $1;
84	my $chief_addr = $2;
85	push(@penguin_chief_names, $chief_name);
86    }
87}
88my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
89
90# Signature types of people who are either
91# 	a) responsible for the code in question, or
92# 	b) familiar enough with it to give relevant feedback
93my @signature_tags = ();
94push(@signature_tags, "Signed-off-by:");
95push(@signature_tags, "Reviewed-by:");
96push(@signature_tags, "Acked-by:");
97
98my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
99
100# rfc822 email address - preloaded methods go here.
101my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
102my $rfc822_char = '[\\000-\\377]';
103
104# VCS command support: class-like functions and strings
105
106my %VCS_cmds;
107
108my %VCS_cmds_git = (
109    "execute_cmd" => \&git_execute_cmd,
110    "available" => '(which("git") ne "") && (-e ".git")',
111    "find_signers_cmd" =>
112	"git log --no-color --follow --since=\$email_git_since " .
113	    '--numstat --no-merges ' .
114	    '--format="GitCommit: %H%n' .
115		      'GitAuthor: %an <%ae>%n' .
116		      'GitDate: %aD%n' .
117		      'GitSubject: %s%n' .
118		      '%b%n"' .
119	    " -- \$file",
120    "find_commit_signers_cmd" =>
121	"git log --no-color " .
122	    '--numstat ' .
123	    '--format="GitCommit: %H%n' .
124		      'GitAuthor: %an <%ae>%n' .
125		      'GitDate: %aD%n' .
126		      'GitSubject: %s%n' .
127		      '%b%n"' .
128	    " -1 \$commit",
129    "find_commit_author_cmd" =>
130	"git log --no-color " .
131	    '--numstat ' .
132	    '--format="GitCommit: %H%n' .
133		      'GitAuthor: %an <%ae>%n' .
134		      'GitDate: %aD%n' .
135		      'GitSubject: %s%n"' .
136	    " -1 \$commit",
137    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
138    "blame_file_cmd" => "git blame -l \$file",
139    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
140    "blame_commit_pattern" => "^([0-9a-f]+) ",
141    "author_pattern" => "^GitAuthor: (.*)",
142    "subject_pattern" => "^GitSubject: (.*)",
143    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
144    "file_exists_cmd" => "git ls-files \$file",
145    "list_files_cmd" => "git ls-files \$file",
146);
147
148my %VCS_cmds_hg = (
149    "execute_cmd" => \&hg_execute_cmd,
150    "available" => '(which("hg") ne "") && (-d ".hg")',
151    "find_signers_cmd" =>
152	"hg log --date=\$email_hg_since " .
153	    "--template='HgCommit: {node}\\n" .
154	                "HgAuthor: {author}\\n" .
155			"HgSubject: {desc}\\n'" .
156	    " -- \$file",
157    "find_commit_signers_cmd" =>
158	"hg log " .
159	    "--template='HgSubject: {desc}\\n'" .
160	    " -r \$commit",
161    "find_commit_author_cmd" =>
162	"hg log " .
163	    "--template='HgCommit: {node}\\n" .
164		        "HgAuthor: {author}\\n" .
165			"HgSubject: {desc|firstline}\\n'" .
166	    " -r \$commit",
167    "blame_range_cmd" => "",		# not supported
168    "blame_file_cmd" => "hg blame -n \$file",
169    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
170    "blame_commit_pattern" => "^([ 0-9a-f]+):",
171    "author_pattern" => "^HgAuthor: (.*)",
172    "subject_pattern" => "^HgSubject: (.*)",
173    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
174    "file_exists_cmd" => "hg files \$file",
175    "list_files_cmd" => "hg manifest -R \$file",
176);
177
178my $conf = which_conf(".get_maintainer.conf");
179if (-f $conf) {
180    my @conf_args;
181    open(my $conffile, '<', "$conf")
182	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
183
184    while (<$conffile>) {
185	my $line = $_;
186
187	$line =~ s/\s*\n?$//g;
188	$line =~ s/^\s*//g;
189	$line =~ s/\s+/ /g;
190
191	next if ($line =~ m/^\s*#/);
192	next if ($line =~ m/^\s*$/);
193
194	my @words = split(" ", $line);
195	foreach my $word (@words) {
196	    last if ($word =~ m/^#/);
197	    push (@conf_args, $word);
198	}
199    }
200    close($conffile);
201    unshift(@ARGV, @conf_args) if @conf_args;
202}
203
204my @ignore_emails = ();
205my $ignore_file = which_conf(".get_maintainer.ignore");
206if (-f $ignore_file) {
207    open(my $ignore, '<', "$ignore_file")
208	or warn "$P: Can't find a readable .get_maintainer.ignore file $!\n";
209    while (<$ignore>) {
210	my $line = $_;
211
212	$line =~ s/\s*\n?$//;
213	$line =~ s/^\s*//;
214	$line =~ s/\s+$//;
215	$line =~ s/#.*$//;
216
217	next if ($line =~ m/^\s*$/);
218	if (rfc822_valid($line)) {
219	    push(@ignore_emails, $line);
220	}
221    }
222    close($ignore);
223}
224
225if ($#ARGV > 0) {
226    foreach (@ARGV) {
227        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
228            die "$P: using --self-test does not allow any other option or argument\n";
229        }
230    }
231}
232
233if (!GetOptions(
234		'email!' => \$email,
235		'git!' => \$email_git,
236		'git-all-signature-types!' => \$email_git_all_signature_types,
237		'git-blame!' => \$email_git_blame,
238		'git-blame-signatures!' => \$email_git_blame_signatures,
239		'git-fallback!' => \$email_git_fallback,
240		'git-chief-penguins!' => \$email_git_penguin_chiefs,
241		'git-min-signatures=i' => \$email_git_min_signatures,
242		'git-max-maintainers=i' => \$email_git_max_maintainers,
243		'git-min-percent=i' => \$email_git_min_percent,
244		'git-since=s' => \$email_git_since,
245		'hg-since=s' => \$email_hg_since,
246		'i|interactive!' => \$interactive,
247		'remove-duplicates!' => \$email_remove_duplicates,
248		'mailmap!' => \$email_use_mailmap,
249		'm!' => \$email_maintainer,
250		'r!' => \$email_reviewer,
251		'n!' => \$email_usename,
252		'l!' => \$email_list,
253		'fixes!' => \$email_fixes,
254		'moderated!' => \$email_moderated_list,
255		's!' => \$email_subscriber_list,
256		'multiline!' => \$output_multiline,
257		'roles!' => \$output_roles,
258		'rolestats!' => \$output_rolestats,
259		'separator=s' => \$output_separator,
260		'subsystem!' => \$subsystem,
261		'status!' => \$status,
262		'scm!' => \$scm,
263		'tree!' => \$tree,
264		'web!' => \$web,
265		'letters=s' => \$letters,
266		'pattern-depth=i' => \$pattern_depth,
267		'k|keywords!' => \$keywords,
268		'sections!' => \$sections,
269		'fe|file-emails!' => \$file_emails,
270		'f|file' => \$from_filename,
271		'find-maintainer-files' => \$find_maintainer_files,
272		'mpath|maintainer-path=s' => \$maintainer_path,
273		'self-test:s' => \$self_test,
274		'v|version' => \$version,
275		'h|help|usage' => \$help,
276		)) {
277    die "$P: invalid argument - use --help if necessary\n";
278}
279
280if ($help != 0) {
281    usage();
282    exit 0;
283}
284
285if ($version != 0) {
286    print("${P} ${V}\n");
287    exit 0;
288}
289
290if (defined $self_test) {
291    read_all_maintainer_files();
292    self_test();
293    exit 0;
294}
295
296if (-t STDIN && !@ARGV) {
297    # We're talking to a terminal, but have no command line arguments.
298    die "$P: missing patchfile or -f file - use --help if necessary\n";
299}
300
301$output_multiline = 0 if ($output_separator ne ", ");
302$output_rolestats = 1 if ($interactive);
303$output_roles = 1 if ($output_rolestats);
304
305if ($sections || $letters ne "") {
306    $sections = 1;
307    $email = 0;
308    $email_list = 0;
309    $scm = 0;
310    $status = 0;
311    $subsystem = 0;
312    $web = 0;
313    $keywords = 0;
314    $interactive = 0;
315} else {
316    my $selections = $email + $scm + $status + $subsystem + $web;
317    if ($selections == 0) {
318	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
319    }
320}
321
322if ($email &&
323    ($email_maintainer + $email_reviewer +
324     $email_list + $email_subscriber_list +
325     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
326    die "$P: Please select at least 1 email option\n";
327}
328
329if ($tree && !top_of_kernel_tree($lk_path)) {
330    die "$P: The current directory does not appear to be "
331	. "a linux kernel source tree.\n";
332}
333
334## Read MAINTAINERS for type/value pairs
335
336my @typevalue = ();
337my %keyword_hash;
338my @mfiles = ();
339my @self_test_info = ();
340
341sub read_maintainer_file {
342    my ($file) = @_;
343
344    open (my $maint, '<', "$file")
345	or die "$P: Can't open MAINTAINERS file '$file': $!\n";
346    my $i = 1;
347    while (<$maint>) {
348	my $line = $_;
349	chomp $line;
350
351	if ($line =~ m/^([A-Z]):\s*(.*)/) {
352	    my $type = $1;
353	    my $value = $2;
354
355	    ##Filename pattern matching
356	    if ($type eq "F" || $type eq "X") {
357		$value =~ s@\.@\\\.@g;       ##Convert . to \.
358		$value =~ s/\*/\.\*/g;       ##Convert * to .*
359		$value =~ s/\?/\./g;         ##Convert ? to .
360		##if pattern is a directory and it lacks a trailing slash, add one
361		if ((-d $value)) {
362		    $value =~ s@([^/])$@$1/@;
363		}
364	    } elsif ($type eq "K") {
365		$keyword_hash{@typevalue} = $value;
366	    }
367	    push(@typevalue, "$type:$value");
368	} elsif (!(/^\s*$/ || /^\s*\#/)) {
369	    push(@typevalue, $line);
370	}
371	if (defined $self_test) {
372	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
373	}
374	$i++;
375    }
376    close($maint);
377}
378
379sub find_is_maintainer_file {
380    my ($file) = $_;
381    return if ($file !~ m@/MAINTAINERS$@);
382    $file = $File::Find::name;
383    return if (! -f $file);
384    push(@mfiles, $file);
385}
386
387sub find_ignore_git {
388    return grep { $_ !~ /^\.git$/; } @_;
389}
390
391read_all_maintainer_files();
392
393sub read_all_maintainer_files {
394    my $path = "${lk_path}MAINTAINERS";
395    if (defined $maintainer_path) {
396	$path = $maintainer_path;
397	# Perl Cookbook tilde expansion if necessary
398	$path =~ s@^~([^/]*)@ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7])@ex;
399    }
400
401    if (-d $path) {
402	$path .= '/' if ($path !~ m@/$@);
403	if ($find_maintainer_files) {
404	    find( { wanted => \&find_is_maintainer_file,
405		    preprocess => \&find_ignore_git,
406		    no_chdir => 1,
407		}, "$path");
408	} else {
409	    opendir(DIR, "$path") or die $!;
410	    my @files = readdir(DIR);
411	    closedir(DIR);
412	    foreach my $file (@files) {
413		push(@mfiles, "$path$file") if ($file !~ /^\./);
414	    }
415	}
416    } elsif (-f "$path") {
417	push(@mfiles, "$path");
418    } else {
419	die "$P: MAINTAINER file not found '$path'\n";
420    }
421    die "$P: No MAINTAINER files found in '$path'\n" if (scalar(@mfiles) == 0);
422    foreach my $file (@mfiles) {
423	read_maintainer_file("$file");
424    }
425}
426
427#
428# Read mail address map
429#
430
431my $mailmap;
432
433read_mailmap();
434
435sub read_mailmap {
436    $mailmap = {
437	names => {},
438	addresses => {}
439    };
440
441    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
442
443    open(my $mailmap_file, '<', "${lk_path}.mailmap")
444	or warn "$P: Can't open .mailmap: $!\n";
445
446    while (<$mailmap_file>) {
447	s/#.*$//; #strip comments
448	s/^\s+|\s+$//g; #trim
449
450	next if (/^\s*$/); #skip empty lines
451	#entries have one of the following formats:
452	# name1 <mail1>
453	# <mail1> <mail2>
454	# name1 <mail1> <mail2>
455	# name1 <mail1> name2 <mail2>
456	# (see man git-shortlog)
457
458	if (/^([^<]+)<([^>]+)>$/) {
459	    my $real_name = $1;
460	    my $address = $2;
461
462	    $real_name =~ s/\s+$//;
463	    ($real_name, $address) = parse_email("$real_name <$address>");
464	    $mailmap->{names}->{$address} = $real_name;
465
466	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
467	    my $real_address = $1;
468	    my $wrong_address = $2;
469
470	    $mailmap->{addresses}->{$wrong_address} = $real_address;
471
472	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
473	    my $real_name = $1;
474	    my $real_address = $2;
475	    my $wrong_address = $3;
476
477	    $real_name =~ s/\s+$//;
478	    ($real_name, $real_address) =
479		parse_email("$real_name <$real_address>");
480	    $mailmap->{names}->{$wrong_address} = $real_name;
481	    $mailmap->{addresses}->{$wrong_address} = $real_address;
482
483	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
484	    my $real_name = $1;
485	    my $real_address = $2;
486	    my $wrong_name = $3;
487	    my $wrong_address = $4;
488
489	    $real_name =~ s/\s+$//;
490	    ($real_name, $real_address) =
491		parse_email("$real_name <$real_address>");
492
493	    $wrong_name =~ s/\s+$//;
494	    ($wrong_name, $wrong_address) =
495		parse_email("$wrong_name <$wrong_address>");
496
497	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
498	    $mailmap->{names}->{$wrong_email} = $real_name;
499	    $mailmap->{addresses}->{$wrong_email} = $real_address;
500	}
501    }
502    close($mailmap_file);
503}
504
505## use the filenames on the command line or find the filenames in the patchfiles
506
507my @files = ();
508my @fixes = ();			# If a patch description includes Fixes: lines
509my @range = ();
510my @keyword_tvi = ();
511my @file_emails = ();
512
513if (!@ARGV) {
514    push(@ARGV, "&STDIN");
515}
516
517foreach my $file (@ARGV) {
518    if ($file ne "&STDIN") {
519	##if $file is a directory and it lacks a trailing slash, add one
520	if ((-d $file)) {
521	    $file =~ s@([^/])$@$1/@;
522	} elsif (!(-f $file)) {
523	    die "$P: file '${file}' not found\n";
524	}
525    }
526    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
527	$file =~ s/^\Q${cur_path}\E//;	#strip any absolute path
528	$file =~ s/^\Q${lk_path}\E//;	#or the path to the lk tree
529	push(@files, $file);
530	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
531	    open(my $f, '<', $file)
532		or die "$P: Can't open $file: $!\n";
533	    my $text = do { local($/) ; <$f> };
534	    close($f);
535	    if ($keywords) {
536		foreach my $line (keys %keyword_hash) {
537		    if ($text =~ m/$keyword_hash{$line}/x) {
538			push(@keyword_tvi, $line);
539		    }
540		}
541	    }
542	    if ($file_emails) {
543		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
544		push(@file_emails, clean_file_emails(@poss_addr));
545	    }
546	}
547    } else {
548	my $file_cnt = @files;
549	my $lastfile;
550
551	open(my $patch, "< $file")
552	    or die "$P: Can't open $file: $!\n";
553
554	# We can check arbitrary information before the patch
555	# like the commit message, mail headers, etc...
556	# This allows us to match arbitrary keywords against any part
557	# of a git format-patch generated file (subject tags, etc...)
558
559	my $patch_prefix = "";			#Parsing the intro
560
561	while (<$patch>) {
562	    my $patch_line = $_;
563	    if (m/^ mode change [0-7]+ => [0-7]+ (\S+)\s*$/) {
564		my $filename = $1;
565		push(@files, $filename);
566	    } elsif (m/^rename (?:from|to) (\S+)\s*$/) {
567		my $filename = $1;
568		push(@files, $filename);
569	    } elsif (m/^diff --git a\/(\S+) b\/(\S+)\s*$/) {
570		my $filename1 = $1;
571		my $filename2 = $2;
572		push(@files, $filename1);
573		push(@files, $filename2);
574	    } elsif (m/^Fixes:\s+([0-9a-fA-F]{6,40})/) {
575		push(@fixes, $1) if ($email_fixes);
576	    } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
577		my $filename = $1;
578		$filename =~ s@^[^/]*/@@;
579		$filename =~ s@\n@@;
580		$lastfile = $filename;
581		push(@files, $filename);
582		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
583	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
584		if ($email_git_blame) {
585		    push(@range, "$lastfile:$1:$2");
586		}
587	    } elsif ($keywords) {
588		foreach my $line (keys %keyword_hash) {
589		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
590			push(@keyword_tvi, $line);
591		    }
592		}
593	    }
594	}
595	close($patch);
596
597	if ($file_cnt == @files) {
598	    warn "$P: file '${file}' doesn't appear to be a patch.  "
599		. "Add -f to options?\n";
600	}
601	@files = sort_and_uniq(@files);
602    }
603}
604
605@file_emails = uniq(@file_emails);
606@fixes = uniq(@fixes);
607
608my %email_hash_name;
609my %email_hash_address;
610my @email_to = ();
611my %hash_list_to;
612my @list_to = ();
613my @scm = ();
614my @web = ();
615my @subsystem = ();
616my @status = ();
617my %deduplicate_name_hash = ();
618my %deduplicate_address_hash = ();
619
620my @maintainers = get_maintainers();
621if (@maintainers) {
622    @maintainers = merge_email(@maintainers);
623    output(@maintainers);
624}
625
626if ($scm) {
627    @scm = uniq(@scm);
628    output(@scm);
629}
630
631if ($status) {
632    @status = uniq(@status);
633    output(@status);
634}
635
636if ($subsystem) {
637    @subsystem = uniq(@subsystem);
638    output(@subsystem);
639}
640
641if ($web) {
642    @web = uniq(@web);
643    output(@web);
644}
645
646exit($exit);
647
648sub self_test {
649    my @lsfiles = ();
650    my @good_links = ();
651    my @bad_links = ();
652    my @section_headers = ();
653    my $index = 0;
654
655    @lsfiles = vcs_list_files($lk_path);
656
657    for my $x (@self_test_info) {
658	$index++;
659
660	## Section header duplication and missing section content
661	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
662	    $x->{line} =~ /^\S[^:]/ &&
663	    defined $self_test_info[$index] &&
664	    $self_test_info[$index]->{line} =~ /^([A-Z]):\s*\S/) {
665	    my $has_S = 0;
666	    my $has_F = 0;
667	    my $has_ML = 0;
668	    my $status = "";
669	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
670		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
671	    } else {
672		push(@section_headers, $x->{line});
673	    }
674	    my $nextline = $index;
675	    while (defined $self_test_info[$nextline] &&
676		   $self_test_info[$nextline]->{line} =~ /^([A-Z]):\s*(\S.*)/) {
677		my $type = $1;
678		my $value = $2;
679		if ($type eq "S") {
680		    $has_S = 1;
681		    $status = $value;
682		} elsif ($type eq "F" || $type eq "N") {
683		    $has_F = 1;
684		} elsif ($type eq "M" || $type eq "R" || $type eq "L") {
685		    $has_ML = 1;
686		}
687		$nextline++;
688	    }
689	    if (!$has_ML && $status !~ /orphan|obsolete/i) {
690		print("$x->{file}:$x->{linenr}: warning: section without email address\t$x->{line}\n");
691	    }
692	    if (!$has_S) {
693		print("$x->{file}:$x->{linenr}: warning: section without status \t$x->{line}\n");
694	    }
695	    if (!$has_F) {
696		print("$x->{file}:$x->{linenr}: warning: section without file pattern\t$x->{line}\n");
697	    }
698	}
699
700	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
701
702	my $type = $1;
703	my $value = $2;
704
705	## Filename pattern matching
706	if (($type eq "F" || $type eq "X") &&
707	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
708	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
709	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
710	    $value =~ s/\?/\./g;         ##Convert ? to .
711	    ##if pattern is a directory and it lacks a trailing slash, add one
712	    if ((-d $value)) {
713		$value =~ s@([^/])$@$1/@;
714	    }
715	    if (!grep(m@^$value@, @lsfiles)) {
716		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
717	    }
718
719	## Link reachability
720	} elsif (($type eq "W" || $type eq "Q" || $type eq "B") &&
721		 $value =~ /^https?:/ &&
722		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
723	    next if (grep(m@^\Q$value\E$@, @good_links));
724	    my $isbad = 0;
725	    if (grep(m@^\Q$value\E$@, @bad_links)) {
726	        $isbad = 1;
727	    } else {
728		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
729		if ($? == 0) {
730		    push(@good_links, $value);
731		} else {
732		    push(@bad_links, $value);
733		    $isbad = 1;
734		}
735	    }
736	    if ($isbad) {
737	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
738	    }
739
740	## SCM reachability
741	} elsif ($type eq "T" &&
742		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
743	    next if (grep(m@^\Q$value\E$@, @good_links));
744	    my $isbad = 0;
745	    if (grep(m@^\Q$value\E$@, @bad_links)) {
746	        $isbad = 1;
747            } elsif ($value !~ /^(?:git|quilt|hg)\s+\S/) {
748		print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
749	    } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
750		my $url = $1;
751		my $branch = "";
752		$branch = $3 if $3;
753		my $output = `git ls-remote --exit-code -h "$url" $branch > /dev/null 2>&1`;
754		if ($? == 0) {
755		    push(@good_links, $value);
756		} else {
757		    push(@bad_links, $value);
758		    $isbad = 1;
759		}
760	    } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
761		my $url = $1;
762		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
763		if ($? == 0) {
764		    push(@good_links, $value);
765		} else {
766		    push(@bad_links, $value);
767		    $isbad = 1;
768		}
769	    }
770	    if ($isbad) {
771		print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
772	    }
773	}
774    }
775}
776
777sub ignore_email_address {
778    my ($address) = @_;
779
780    foreach my $ignore (@ignore_emails) {
781	return 1 if ($ignore eq $address);
782    }
783
784    return 0;
785}
786
787sub range_is_maintained {
788    my ($start, $end) = @_;
789
790    for (my $i = $start; $i < $end; $i++) {
791	my $line = $typevalue[$i];
792	if ($line =~ m/^([A-Z]):\s*(.*)/) {
793	    my $type = $1;
794	    my $value = $2;
795	    if ($type eq 'S') {
796		if ($value =~ /(maintain|support)/i) {
797		    return 1;
798		}
799	    }
800	}
801    }
802    return 0;
803}
804
805sub range_has_maintainer {
806    my ($start, $end) = @_;
807
808    for (my $i = $start; $i < $end; $i++) {
809	my $line = $typevalue[$i];
810	if ($line =~ m/^([A-Z]):\s*(.*)/) {
811	    my $type = $1;
812	    my $value = $2;
813	    if ($type eq 'M') {
814		return 1;
815	    }
816	}
817    }
818    return 0;
819}
820
821sub get_maintainers {
822    %email_hash_name = ();
823    %email_hash_address = ();
824    %commit_author_hash = ();
825    %commit_signer_hash = ();
826    @email_to = ();
827    %hash_list_to = ();
828    @list_to = ();
829    @scm = ();
830    @web = ();
831    @subsystem = ();
832    @status = ();
833    %deduplicate_name_hash = ();
834    %deduplicate_address_hash = ();
835    if ($email_git_all_signature_types) {
836	$signature_pattern = "(.+?)[Bb][Yy]:";
837    } else {
838	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
839    }
840
841    # Find responsible parties
842
843    my %exact_pattern_match_hash = ();
844
845    foreach my $file (@files) {
846
847	my %hash;
848	my $tvi = find_first_section();
849	while ($tvi < @typevalue) {
850	    my $start = find_starting_index($tvi);
851	    my $end = find_ending_index($tvi);
852	    my $exclude = 0;
853	    my $i;
854
855	    #Do not match excluded file patterns
856
857	    for ($i = $start; $i < $end; $i++) {
858		my $line = $typevalue[$i];
859		if ($line =~ m/^([A-Z]):\s*(.*)/) {
860		    my $type = $1;
861		    my $value = $2;
862		    if ($type eq 'X') {
863			if (file_match_pattern($file, $value)) {
864			    $exclude = 1;
865			    last;
866			}
867		    }
868		}
869	    }
870
871	    if (!$exclude) {
872		for ($i = $start; $i < $end; $i++) {
873		    my $line = $typevalue[$i];
874		    if ($line =~ m/^([A-Z]):\s*(.*)/) {
875			my $type = $1;
876			my $value = $2;
877			if ($type eq 'F') {
878			    if (file_match_pattern($file, $value)) {
879				my $value_pd = ($value =~ tr@/@@);
880				my $file_pd = ($file  =~ tr@/@@);
881				$value_pd++ if (substr($value,-1,1) ne "/");
882				$value_pd = -1 if ($value =~ /^\.\*/);
883				if ($value_pd >= $file_pd &&
884				    range_is_maintained($start, $end) &&
885				    range_has_maintainer($start, $end)) {
886				    $exact_pattern_match_hash{$file} = 1;
887				}
888				if ($pattern_depth == 0 ||
889				    (($file_pd - $value_pd) < $pattern_depth)) {
890				    $hash{$tvi} = $value_pd;
891				}
892			    }
893			} elsif ($type eq 'N') {
894			    if ($file =~ m/$value/x) {
895				$hash{$tvi} = 0;
896			    }
897			}
898		    }
899		}
900	    }
901	    $tvi = $end + 1;
902	}
903
904	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
905	    add_categories($line);
906	    if ($sections) {
907		my $i;
908		my $start = find_starting_index($line);
909		my $end = find_ending_index($line);
910		for ($i = $start; $i < $end; $i++) {
911		    my $line = $typevalue[$i];
912		    if ($line =~ /^[FX]:/) {		##Restore file patterns
913			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
914			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
915			$line =~ s/\\\./\./g;       	##Convert \. to .
916			$line =~ s/\.\*/\*/g;       	##Convert .* to *
917		    }
918		    my $count = $line =~ s/^([A-Z]):/$1:\t/g;
919		    if ($letters eq "" || (!$count || $letters =~ /$1/i)) {
920			print("$line\n");
921		    }
922		}
923		print("\n");
924	    }
925	}
926    }
927
928    if ($keywords) {
929	@keyword_tvi = sort_and_uniq(@keyword_tvi);
930	foreach my $line (@keyword_tvi) {
931	    add_categories($line);
932	}
933    }
934
935    foreach my $fix (@fixes) {
936	vcs_add_commit_signers($fix, "blamed_fixes");
937    }
938
939    foreach my $email (@email_to, @list_to) {
940	$email->[0] = deduplicate_email($email->[0]);
941    }
942
943    foreach my $file (@files) {
944	if ($email &&
945	    ($email_git || ($email_git_fallback &&
946			    !$exact_pattern_match_hash{$file}))) {
947	    vcs_file_signoffs($file);
948	}
949	if ($email && $email_git_blame) {
950	    vcs_file_blame($file);
951	}
952    }
953
954    if ($email) {
955	foreach my $chief (@penguin_chief) {
956	    if ($chief =~ m/^(.*):(.*)/) {
957		my $email_address;
958
959		$email_address = format_email($1, $2, $email_usename);
960		if ($email_git_penguin_chiefs) {
961		    push(@email_to, [$email_address, 'chief penguin']);
962		} else {
963		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
964		}
965	    }
966	}
967
968	foreach my $email (@file_emails) {
969	    my ($name, $address) = parse_email($email);
970
971	    my $tmp_email = format_email($name, $address, $email_usename);
972	    push_email_address($tmp_email, '');
973	    add_role($tmp_email, 'in file');
974	}
975    }
976
977    my @to = ();
978    if ($email || $email_list) {
979	if ($email) {
980	    @to = (@to, @email_to);
981	}
982	if ($email_list) {
983	    @to = (@to, @list_to);
984	}
985    }
986
987    if ($interactive) {
988	@to = interactive_get_maintainers(\@to);
989    }
990
991    return @to;
992}
993
994sub file_match_pattern {
995    my ($file, $pattern) = @_;
996    if (substr($pattern, -1) eq "/") {
997	if ($file =~ m@^$pattern@) {
998	    return 1;
999	}
1000    } else {
1001	if ($file =~ m@^$pattern@) {
1002	    my $s1 = ($file =~ tr@/@@);
1003	    my $s2 = ($pattern =~ tr@/@@);
1004	    if ($s1 == $s2) {
1005		return 1;
1006	    }
1007	}
1008    }
1009    return 0;
1010}
1011
1012sub usage {
1013    print <<EOT;
1014usage: $P [options] patchfile
1015       $P [options] -f file|directory
1016version: $V
1017
1018MAINTAINER field selection options:
1019  --email => print email address(es) if any
1020    --git => include recent git \*-by: signers
1021    --git-all-signature-types => include signers regardless of signature type
1022        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
1023    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
1024    --git-chief-penguins => include ${penguin_chiefs}
1025    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
1026    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
1027    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
1028    --git-blame => use git blame to find modified commits for patch or file
1029    --git-blame-signatures => when used with --git-blame, also include all commit signers
1030    --git-since => git history to use (default: $email_git_since)
1031    --hg-since => hg history to use (default: $email_hg_since)
1032    --interactive => display a menu (mostly useful if used with the --git option)
1033    --m => include maintainer(s) if any
1034    --r => include reviewer(s) if any
1035    --n => include name 'Full Name <addr\@domain.tld>'
1036    --l => include list(s) if any
1037    --moderated => include moderated lists(s) if any (default: true)
1038    --s => include subscriber only list(s) if any (default: false)
1039    --remove-duplicates => minimize duplicate email names/addresses
1040    --roles => show roles (status:subsystem, git-signer, list, etc...)
1041    --rolestats => show roles and statistics (commits/total_commits, %)
1042    --file-emails => add email addresses found in -f file (default: 0 (off))
1043    --fixes => for patches, add signatures of commits with 'Fixes: <commit>' (default: 1 (on))
1044  --scm => print SCM tree(s) if any
1045  --status => print status if any
1046  --subsystem => print subsystem name if any
1047  --web => print website(s) if any
1048
1049Output type options:
1050  --separator [, ] => separator for multiple entries on 1 line
1051    using --separator also sets --nomultiline if --separator is not [, ]
1052  --multiline => print 1 entry per line
1053
1054Other options:
1055  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
1056  --keywords => scan patch for keywords (default: $keywords)
1057  --sections => print all of the subsystem sections with pattern matches
1058  --letters => print all matching 'letter' types from all matching sections
1059  --mailmap => use .mailmap file (default: $email_use_mailmap)
1060  --no-tree => run without a kernel tree
1061  --self-test => show potential issues with MAINTAINERS file content
1062  --version => show version
1063  --help => show this help information
1064
1065Default options:
1066  [--email --tree --nogit --git-fallback --m --r --n --l --multiline
1067   --pattern-depth=0 --remove-duplicates --rolestats]
1068
1069Notes:
1070  Using "-f directory" may give unexpected results:
1071      Used with "--git", git signators for _all_ files in and below
1072          directory are examined as git recurses directories.
1073          Any specified X: (exclude) pattern matches are _not_ ignored.
1074      Used with "--nogit", directory is used as a pattern match,
1075          no individual file within the directory or subdirectory
1076          is matched.
1077      Used with "--git-blame", does not iterate all files in directory
1078  Using "--git-blame" is slow and may add old committers and authors
1079      that are no longer active maintainers to the output.
1080  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
1081      other automated tools that expect only ["name"] <email address>
1082      may not work because of additional output after <email address>.
1083  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
1084      not the percentage of the entire file authored.  # of commits is
1085      not a good measure of amount of code authored.  1 major commit may
1086      contain a thousand lines, 5 trivial commits may modify a single line.
1087  If git is not installed, but mercurial (hg) is installed and an .hg
1088      repository exists, the following options apply to mercurial:
1089          --git,
1090          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
1091          --git-blame
1092      Use --hg-since not --git-since to control date selection
1093  File ".get_maintainer.conf", if it exists in the linux kernel source root
1094      directory, can change whatever get_maintainer defaults are desired.
1095      Entries in this file can be any command line argument.
1096      This file is prepended to any additional command line arguments.
1097      Multiple lines and # comments are allowed.
1098  Most options have both positive and negative forms.
1099      The negative forms for --<foo> are --no<foo> and --no-<foo>.
1100
1101EOT
1102}
1103
1104sub top_of_kernel_tree {
1105    my ($lk_path) = @_;
1106
1107    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
1108	$lk_path .= "/";
1109    }
1110    if (   (-f "${lk_path}COPYING")
1111	&& (-f "${lk_path}CREDITS")
1112	&& (-f "${lk_path}Kbuild")
1113	&& (-e "${lk_path}MAINTAINERS")
1114	&& (-f "${lk_path}Makefile")
1115	&& (-f "${lk_path}README")
1116	&& (-d "${lk_path}Documentation")
1117	&& (-d "${lk_path}arch")
1118	&& (-d "${lk_path}include")
1119	&& (-d "${lk_path}drivers")
1120	&& (-d "${lk_path}fs")
1121	&& (-d "${lk_path}init")
1122	&& (-d "${lk_path}ipc")
1123	&& (-d "${lk_path}kernel")
1124	&& (-d "${lk_path}lib")
1125	&& (-d "${lk_path}scripts")) {
1126	return 1;
1127    }
1128    return 0;
1129}
1130
1131sub parse_email {
1132    my ($formatted_email) = @_;
1133
1134    my $name = "";
1135    my $address = "";
1136
1137    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
1138	$name = $1;
1139	$address = $2;
1140    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
1141	$address = $1;
1142    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
1143	$address = $1;
1144    }
1145
1146    $name =~ s/^\s+|\s+$//g;
1147    $name =~ s/^\"|\"$//g;
1148    $address =~ s/^\s+|\s+$//g;
1149
1150    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
1151	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1152	$name = "\"$name\"";
1153    }
1154
1155    return ($name, $address);
1156}
1157
1158sub format_email {
1159    my ($name, $address, $usename) = @_;
1160
1161    my $formatted_email;
1162
1163    $name =~ s/^\s+|\s+$//g;
1164    $name =~ s/^\"|\"$//g;
1165    $address =~ s/^\s+|\s+$//g;
1166
1167    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
1168	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1169	$name = "\"$name\"";
1170    }
1171
1172    if ($usename) {
1173	if ("$name" eq "") {
1174	    $formatted_email = "$address";
1175	} else {
1176	    $formatted_email = "$name <$address>";
1177	}
1178    } else {
1179	$formatted_email = $address;
1180    }
1181
1182    return $formatted_email;
1183}
1184
1185sub find_first_section {
1186    my $index = 0;
1187
1188    while ($index < @typevalue) {
1189	my $tv = $typevalue[$index];
1190	if (($tv =~ m/^([A-Z]):\s*(.*)/)) {
1191	    last;
1192	}
1193	$index++;
1194    }
1195
1196    return $index;
1197}
1198
1199sub find_starting_index {
1200    my ($index) = @_;
1201
1202    while ($index > 0) {
1203	my $tv = $typevalue[$index];
1204	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1205	    last;
1206	}
1207	$index--;
1208    }
1209
1210    return $index;
1211}
1212
1213sub find_ending_index {
1214    my ($index) = @_;
1215
1216    while ($index < @typevalue) {
1217	my $tv = $typevalue[$index];
1218	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1219	    last;
1220	}
1221	$index++;
1222    }
1223
1224    return $index;
1225}
1226
1227sub get_subsystem_name {
1228    my ($index) = @_;
1229
1230    my $start = find_starting_index($index);
1231
1232    my $subsystem = $typevalue[$start];
1233    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
1234	$subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
1235	$subsystem =~ s/\s*$//;
1236	$subsystem = $subsystem . "...";
1237    }
1238    return $subsystem;
1239}
1240
1241sub get_maintainer_role {
1242    my ($index) = @_;
1243
1244    my $i;
1245    my $start = find_starting_index($index);
1246    my $end = find_ending_index($index);
1247
1248    my $role = "unknown";
1249    my $subsystem = get_subsystem_name($index);
1250
1251    for ($i = $start + 1; $i < $end; $i++) {
1252	my $tv = $typevalue[$i];
1253	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1254	    my $ptype = $1;
1255	    my $pvalue = $2;
1256	    if ($ptype eq "S") {
1257		$role = $pvalue;
1258	    }
1259	}
1260    }
1261
1262    $role = lc($role);
1263    if      ($role eq "supported") {
1264	$role = "supporter";
1265    } elsif ($role eq "maintained") {
1266	$role = "maintainer";
1267    } elsif ($role eq "odd fixes") {
1268	$role = "odd fixer";
1269    } elsif ($role eq "orphan") {
1270	$role = "orphan minder";
1271    } elsif ($role eq "obsolete") {
1272	$role = "obsolete minder";
1273    } elsif ($role eq "buried alive in reporters") {
1274	$role = "chief penguin";
1275    }
1276
1277    return $role . ":" . $subsystem;
1278}
1279
1280sub get_list_role {
1281    my ($index) = @_;
1282
1283    my $subsystem = get_subsystem_name($index);
1284
1285    if ($subsystem eq "THE REST") {
1286	$subsystem = "";
1287    }
1288
1289    return $subsystem;
1290}
1291
1292sub add_categories {
1293    my ($index) = @_;
1294
1295    my $i;
1296    my $start = find_starting_index($index);
1297    my $end = find_ending_index($index);
1298
1299    push(@subsystem, $typevalue[$start]);
1300
1301    for ($i = $start + 1; $i < $end; $i++) {
1302	my $tv = $typevalue[$i];
1303	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1304	    my $ptype = $1;
1305	    my $pvalue = $2;
1306	    if ($ptype eq "L") {
1307		my $list_address = $pvalue;
1308		my $list_additional = "";
1309		my $list_role = get_list_role($i);
1310
1311		if ($list_role ne "") {
1312		    $list_role = ":" . $list_role;
1313		}
1314		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1315		    $list_address = $1;
1316		    $list_additional = $2;
1317		}
1318		if ($list_additional =~ m/subscribers-only/) {
1319		    if ($email_subscriber_list) {
1320			if (!$hash_list_to{lc($list_address)}) {
1321			    $hash_list_to{lc($list_address)} = 1;
1322			    push(@list_to, [$list_address,
1323					    "subscriber list${list_role}"]);
1324			}
1325		    }
1326		} else {
1327		    if ($email_list) {
1328			if (!$hash_list_to{lc($list_address)}) {
1329			    if ($list_additional =~ m/moderated/) {
1330				if ($email_moderated_list) {
1331				    $hash_list_to{lc($list_address)} = 1;
1332				    push(@list_to, [$list_address,
1333						    "moderated list${list_role}"]);
1334				}
1335			    } else {
1336				$hash_list_to{lc($list_address)} = 1;
1337				push(@list_to, [$list_address,
1338						"open list${list_role}"]);
1339			    }
1340			}
1341		    }
1342		}
1343	    } elsif ($ptype eq "M") {
1344		my ($name, $address) = parse_email($pvalue);
1345		if ($name eq "") {
1346		    if ($i > 0) {
1347			my $tv = $typevalue[$i - 1];
1348			if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1349			    if ($1 eq "P") {
1350				$name = $2;
1351				$pvalue = format_email($name, $address, $email_usename);
1352			    }
1353			}
1354		    }
1355		}
1356		if ($email_maintainer) {
1357		    my $role = get_maintainer_role($i);
1358		    push_email_addresses($pvalue, $role);
1359		}
1360	    } elsif ($ptype eq "R") {
1361		my ($name, $address) = parse_email($pvalue);
1362		if ($name eq "") {
1363		    if ($i > 0) {
1364			my $tv = $typevalue[$i - 1];
1365			if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1366			    if ($1 eq "P") {
1367				$name = $2;
1368				$pvalue = format_email($name, $address, $email_usename);
1369			    }
1370			}
1371		    }
1372		}
1373		if ($email_reviewer) {
1374		    my $subsystem = get_subsystem_name($i);
1375		    push_email_addresses($pvalue, "reviewer:$subsystem");
1376		}
1377	    } elsif ($ptype eq "T") {
1378		push(@scm, $pvalue);
1379	    } elsif ($ptype eq "W") {
1380		push(@web, $pvalue);
1381	    } elsif ($ptype eq "S") {
1382		push(@status, $pvalue);
1383	    }
1384	}
1385    }
1386}
1387
1388sub email_inuse {
1389    my ($name, $address) = @_;
1390
1391    return 1 if (($name eq "") && ($address eq ""));
1392    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1393    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1394
1395    return 0;
1396}
1397
1398sub push_email_address {
1399    my ($line, $role) = @_;
1400
1401    my ($name, $address) = parse_email($line);
1402
1403    if ($address eq "") {
1404	return 0;
1405    }
1406
1407    if (!$email_remove_duplicates) {
1408	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1409    } elsif (!email_inuse($name, $address)) {
1410	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1411	$email_hash_name{lc($name)}++ if ($name ne "");
1412	$email_hash_address{lc($address)}++;
1413    }
1414
1415    return 1;
1416}
1417
1418sub push_email_addresses {
1419    my ($address, $role) = @_;
1420
1421    my @address_list = ();
1422
1423    if (rfc822_valid($address)) {
1424	push_email_address($address, $role);
1425    } elsif (@address_list = rfc822_validlist($address)) {
1426	my $array_count = shift(@address_list);
1427	while (my $entry = shift(@address_list)) {
1428	    push_email_address($entry, $role);
1429	}
1430    } else {
1431	if (!push_email_address($address, $role)) {
1432	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1433	}
1434    }
1435}
1436
1437sub add_role {
1438    my ($line, $role) = @_;
1439
1440    my ($name, $address) = parse_email($line);
1441    my $email = format_email($name, $address, $email_usename);
1442
1443    foreach my $entry (@email_to) {
1444	if ($email_remove_duplicates) {
1445	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1446	    if (($name eq $entry_name || $address eq $entry_address)
1447		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1448	    ) {
1449		if ($entry->[1] eq "") {
1450		    $entry->[1] = "$role";
1451		} else {
1452		    $entry->[1] = "$entry->[1],$role";
1453		}
1454	    }
1455	} else {
1456	    if ($email eq $entry->[0]
1457		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1458	    ) {
1459		if ($entry->[1] eq "") {
1460		    $entry->[1] = "$role";
1461		} else {
1462		    $entry->[1] = "$entry->[1],$role";
1463		}
1464	    }
1465	}
1466    }
1467}
1468
1469sub which {
1470    my ($bin) = @_;
1471
1472    foreach my $path (split(/:/, $ENV{PATH})) {
1473	if (-e "$path/$bin") {
1474	    return "$path/$bin";
1475	}
1476    }
1477
1478    return "";
1479}
1480
1481sub which_conf {
1482    my ($conf) = @_;
1483
1484    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1485	if (-e "$path/$conf") {
1486	    return "$path/$conf";
1487	}
1488    }
1489
1490    return "";
1491}
1492
1493sub mailmap_email {
1494    my ($line) = @_;
1495
1496    my ($name, $address) = parse_email($line);
1497    my $email = format_email($name, $address, 1);
1498    my $real_name = $name;
1499    my $real_address = $address;
1500
1501    if (exists $mailmap->{names}->{$email} ||
1502	exists $mailmap->{addresses}->{$email}) {
1503	if (exists $mailmap->{names}->{$email}) {
1504	    $real_name = $mailmap->{names}->{$email};
1505	}
1506	if (exists $mailmap->{addresses}->{$email}) {
1507	    $real_address = $mailmap->{addresses}->{$email};
1508	}
1509    } else {
1510	if (exists $mailmap->{names}->{$address}) {
1511	    $real_name = $mailmap->{names}->{$address};
1512	}
1513	if (exists $mailmap->{addresses}->{$address}) {
1514	    $real_address = $mailmap->{addresses}->{$address};
1515	}
1516    }
1517    return format_email($real_name, $real_address, 1);
1518}
1519
1520sub mailmap {
1521    my (@addresses) = @_;
1522
1523    my @mapped_emails = ();
1524    foreach my $line (@addresses) {
1525	push(@mapped_emails, mailmap_email($line));
1526    }
1527    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1528    return @mapped_emails;
1529}
1530
1531sub merge_by_realname {
1532    my %address_map;
1533    my (@emails) = @_;
1534
1535    foreach my $email (@emails) {
1536	my ($name, $address) = parse_email($email);
1537	if (exists $address_map{$name}) {
1538	    $address = $address_map{$name};
1539	    $email = format_email($name, $address, 1);
1540	} else {
1541	    $address_map{$name} = $address;
1542	}
1543    }
1544}
1545
1546sub git_execute_cmd {
1547    my ($cmd) = @_;
1548    my @lines = ();
1549
1550    my $output = `$cmd`;
1551    $output =~ s/^\s*//gm;
1552    @lines = split("\n", $output);
1553
1554    return @lines;
1555}
1556
1557sub hg_execute_cmd {
1558    my ($cmd) = @_;
1559    my @lines = ();
1560
1561    my $output = `$cmd`;
1562    @lines = split("\n", $output);
1563
1564    return @lines;
1565}
1566
1567sub extract_formatted_signatures {
1568    my (@signature_lines) = @_;
1569
1570    my @type = @signature_lines;
1571
1572    s/\s*(.*):.*/$1/ for (@type);
1573
1574    # cut -f2- -d":"
1575    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1576
1577## Reformat email addresses (with names) to avoid badly written signatures
1578
1579    foreach my $signer (@signature_lines) {
1580	$signer = deduplicate_email($signer);
1581    }
1582
1583    return (\@type, \@signature_lines);
1584}
1585
1586sub vcs_find_signers {
1587    my ($cmd, $file) = @_;
1588    my $commits;
1589    my @lines = ();
1590    my @signatures = ();
1591    my @authors = ();
1592    my @stats = ();
1593
1594    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1595
1596    my $pattern = $VCS_cmds{"commit_pattern"};
1597    my $author_pattern = $VCS_cmds{"author_pattern"};
1598    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1599
1600    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1601
1602    $commits = grep(/$pattern/, @lines);	# of commits
1603
1604    @authors = grep(/$author_pattern/, @lines);
1605    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1606    @stats = grep(/$stat_pattern/, @lines);
1607
1608#    print("stats: <@stats>\n");
1609
1610    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1611
1612    save_commits_by_author(@lines) if ($interactive);
1613    save_commits_by_signer(@lines) if ($interactive);
1614
1615    if (!$email_git_penguin_chiefs) {
1616	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1617    }
1618
1619    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1620    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1621
1622    return ($commits, $signers_ref, $authors_ref, \@stats);
1623}
1624
1625sub vcs_find_author {
1626    my ($cmd) = @_;
1627    my @lines = ();
1628
1629    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1630
1631    if (!$email_git_penguin_chiefs) {
1632	@lines = grep(!/${penguin_chiefs}/i, @lines);
1633    }
1634
1635    return @lines if !@lines;
1636
1637    my @authors = ();
1638    foreach my $line (@lines) {
1639	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1640	    my $author = $1;
1641	    my ($name, $address) = parse_email($author);
1642	    $author = format_email($name, $address, 1);
1643	    push(@authors, $author);
1644	}
1645    }
1646
1647    save_commits_by_author(@lines) if ($interactive);
1648    save_commits_by_signer(@lines) if ($interactive);
1649
1650    return @authors;
1651}
1652
1653sub vcs_save_commits {
1654    my ($cmd) = @_;
1655    my @lines = ();
1656    my @commits = ();
1657
1658    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1659
1660    foreach my $line (@lines) {
1661	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1662	    push(@commits, $1);
1663	}
1664    }
1665
1666    return @commits;
1667}
1668
1669sub vcs_blame {
1670    my ($file) = @_;
1671    my $cmd;
1672    my @commits = ();
1673
1674    return @commits if (!(-f $file));
1675
1676    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1677	my @all_commits = ();
1678
1679	$cmd = $VCS_cmds{"blame_file_cmd"};
1680	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1681	@all_commits = vcs_save_commits($cmd);
1682
1683	foreach my $file_range_diff (@range) {
1684	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1685	    my $diff_file = $1;
1686	    my $diff_start = $2;
1687	    my $diff_length = $3;
1688	    next if ("$file" ne "$diff_file");
1689	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1690		push(@commits, $all_commits[$i]);
1691	    }
1692	}
1693    } elsif (@range) {
1694	foreach my $file_range_diff (@range) {
1695	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1696	    my $diff_file = $1;
1697	    my $diff_start = $2;
1698	    my $diff_length = $3;
1699	    next if ("$file" ne "$diff_file");
1700	    $cmd = $VCS_cmds{"blame_range_cmd"};
1701	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1702	    push(@commits, vcs_save_commits($cmd));
1703	}
1704    } else {
1705	$cmd = $VCS_cmds{"blame_file_cmd"};
1706	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1707	@commits = vcs_save_commits($cmd);
1708    }
1709
1710    foreach my $commit (@commits) {
1711	$commit =~ s/^\^//g;
1712    }
1713
1714    return @commits;
1715}
1716
1717my $printed_novcs = 0;
1718sub vcs_exists {
1719    %VCS_cmds = %VCS_cmds_git;
1720    return 1 if eval $VCS_cmds{"available"};
1721    %VCS_cmds = %VCS_cmds_hg;
1722    return 2 if eval $VCS_cmds{"available"};
1723    %VCS_cmds = ();
1724    if (!$printed_novcs) {
1725	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1726	warn("Using a git repository produces better results.\n");
1727	warn("Try Linus Torvalds' latest git repository using:\n");
1728	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1729	$printed_novcs = 1;
1730    }
1731    return 0;
1732}
1733
1734sub vcs_is_git {
1735    vcs_exists();
1736    return $vcs_used == 1;
1737}
1738
1739sub vcs_is_hg {
1740    return $vcs_used == 2;
1741}
1742
1743sub vcs_add_commit_signers {
1744    return if (!vcs_exists());
1745
1746    my ($commit, $desc) = @_;
1747    my $commit_count = 0;
1748    my $commit_authors_ref;
1749    my $commit_signers_ref;
1750    my $stats_ref;
1751    my @commit_authors = ();
1752    my @commit_signers = ();
1753    my $cmd;
1754
1755    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1756    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1757
1758    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, "");
1759    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
1760    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
1761
1762    foreach my $signer (@commit_signers) {
1763	$signer = deduplicate_email($signer);
1764    }
1765
1766    vcs_assign($desc, 1, @commit_signers);
1767}
1768
1769sub interactive_get_maintainers {
1770    my ($list_ref) = @_;
1771    my @list = @$list_ref;
1772
1773    vcs_exists();
1774
1775    my %selected;
1776    my %authored;
1777    my %signed;
1778    my $count = 0;
1779    my $maintained = 0;
1780    foreach my $entry (@list) {
1781	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1782	$selected{$count} = 1;
1783	$authored{$count} = 0;
1784	$signed{$count} = 0;
1785	$count++;
1786    }
1787
1788    #menu loop
1789    my $done = 0;
1790    my $print_options = 0;
1791    my $redraw = 1;
1792    while (!$done) {
1793	$count = 0;
1794	if ($redraw) {
1795	    printf STDERR "\n%1s %2s %-65s",
1796			  "*", "#", "email/list and role:stats";
1797	    if ($email_git ||
1798		($email_git_fallback && !$maintained) ||
1799		$email_git_blame) {
1800		print STDERR "auth sign";
1801	    }
1802	    print STDERR "\n";
1803	    foreach my $entry (@list) {
1804		my $email = $entry->[0];
1805		my $role = $entry->[1];
1806		my $sel = "";
1807		$sel = "*" if ($selected{$count});
1808		my $commit_author = $commit_author_hash{$email};
1809		my $commit_signer = $commit_signer_hash{$email};
1810		my $authored = 0;
1811		my $signed = 0;
1812		$authored++ for (@{$commit_author});
1813		$signed++ for (@{$commit_signer});
1814		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1815		printf STDERR "%4d %4d", $authored, $signed
1816		    if ($authored > 0 || $signed > 0);
1817		printf STDERR "\n     %s\n", $role;
1818		if ($authored{$count}) {
1819		    my $commit_author = $commit_author_hash{$email};
1820		    foreach my $ref (@{$commit_author}) {
1821			print STDERR "     Author: @{$ref}[1]\n";
1822		    }
1823		}
1824		if ($signed{$count}) {
1825		    my $commit_signer = $commit_signer_hash{$email};
1826		    foreach my $ref (@{$commit_signer}) {
1827			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1828		    }
1829		}
1830
1831		$count++;
1832	    }
1833	}
1834	my $date_ref = \$email_git_since;
1835	$date_ref = \$email_hg_since if (vcs_is_hg());
1836	if ($print_options) {
1837	    $print_options = 0;
1838	    if (vcs_exists()) {
1839		print STDERR <<EOT
1840
1841Version Control options:
1842g  use git history      [$email_git]
1843gf use git-fallback     [$email_git_fallback]
1844b  use git blame        [$email_git_blame]
1845bs use blame signatures [$email_git_blame_signatures]
1846c# minimum commits      [$email_git_min_signatures]
1847%# min percent          [$email_git_min_percent]
1848d# history to use       [$$date_ref]
1849x# max maintainers      [$email_git_max_maintainers]
1850t  all signature types  [$email_git_all_signature_types]
1851m  use .mailmap         [$email_use_mailmap]
1852EOT
1853	    }
1854	    print STDERR <<EOT
1855
1856Additional options:
18570  toggle all
1858tm toggle maintainers
1859tg toggle git entries
1860tl toggle open list entries
1861ts toggle subscriber list entries
1862f  emails in file       [$file_emails]
1863k  keywords in file     [$keywords]
1864r  remove duplicates    [$email_remove_duplicates]
1865p# pattern match depth  [$pattern_depth]
1866EOT
1867	}
1868	print STDERR
1869"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1870
1871	my $input = <STDIN>;
1872	chomp($input);
1873
1874	$redraw = 1;
1875	my $rerun = 0;
1876	my @wish = split(/[, ]+/, $input);
1877	foreach my $nr (@wish) {
1878	    $nr = lc($nr);
1879	    my $sel = substr($nr, 0, 1);
1880	    my $str = substr($nr, 1);
1881	    my $val = 0;
1882	    $val = $1 if $str =~ /^(\d+)$/;
1883
1884	    if ($sel eq "y") {
1885		$interactive = 0;
1886		$done = 1;
1887		$output_rolestats = 0;
1888		$output_roles = 0;
1889		last;
1890	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1891		$selected{$nr - 1} = !$selected{$nr - 1};
1892	    } elsif ($sel eq "*" || $sel eq '^') {
1893		my $toggle = 0;
1894		$toggle = 1 if ($sel eq '*');
1895		for (my $i = 0; $i < $count; $i++) {
1896		    $selected{$i} = $toggle;
1897		}
1898	    } elsif ($sel eq "0") {
1899		for (my $i = 0; $i < $count; $i++) {
1900		    $selected{$i} = !$selected{$i};
1901		}
1902	    } elsif ($sel eq "t") {
1903		if (lc($str) eq "m") {
1904		    for (my $i = 0; $i < $count; $i++) {
1905			$selected{$i} = !$selected{$i}
1906			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1907		    }
1908		} elsif (lc($str) eq "g") {
1909		    for (my $i = 0; $i < $count; $i++) {
1910			$selected{$i} = !$selected{$i}
1911			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1912		    }
1913		} elsif (lc($str) eq "l") {
1914		    for (my $i = 0; $i < $count; $i++) {
1915			$selected{$i} = !$selected{$i}
1916			    if ($list[$i]->[1] =~ /^(open list)/i);
1917		    }
1918		} elsif (lc($str) eq "s") {
1919		    for (my $i = 0; $i < $count; $i++) {
1920			$selected{$i} = !$selected{$i}
1921			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1922		    }
1923		}
1924	    } elsif ($sel eq "a") {
1925		if ($val > 0 && $val <= $count) {
1926		    $authored{$val - 1} = !$authored{$val - 1};
1927		} elsif ($str eq '*' || $str eq '^') {
1928		    my $toggle = 0;
1929		    $toggle = 1 if ($str eq '*');
1930		    for (my $i = 0; $i < $count; $i++) {
1931			$authored{$i} = $toggle;
1932		    }
1933		}
1934	    } elsif ($sel eq "s") {
1935		if ($val > 0 && $val <= $count) {
1936		    $signed{$val - 1} = !$signed{$val - 1};
1937		} elsif ($str eq '*' || $str eq '^') {
1938		    my $toggle = 0;
1939		    $toggle = 1 if ($str eq '*');
1940		    for (my $i = 0; $i < $count; $i++) {
1941			$signed{$i} = $toggle;
1942		    }
1943		}
1944	    } elsif ($sel eq "o") {
1945		$print_options = 1;
1946		$redraw = 1;
1947	    } elsif ($sel eq "g") {
1948		if ($str eq "f") {
1949		    bool_invert(\$email_git_fallback);
1950		} else {
1951		    bool_invert(\$email_git);
1952		}
1953		$rerun = 1;
1954	    } elsif ($sel eq "b") {
1955		if ($str eq "s") {
1956		    bool_invert(\$email_git_blame_signatures);
1957		} else {
1958		    bool_invert(\$email_git_blame);
1959		}
1960		$rerun = 1;
1961	    } elsif ($sel eq "c") {
1962		if ($val > 0) {
1963		    $email_git_min_signatures = $val;
1964		    $rerun = 1;
1965		}
1966	    } elsif ($sel eq "x") {
1967		if ($val > 0) {
1968		    $email_git_max_maintainers = $val;
1969		    $rerun = 1;
1970		}
1971	    } elsif ($sel eq "%") {
1972		if ($str ne "" && $val >= 0) {
1973		    $email_git_min_percent = $val;
1974		    $rerun = 1;
1975		}
1976	    } elsif ($sel eq "d") {
1977		if (vcs_is_git()) {
1978		    $email_git_since = $str;
1979		} elsif (vcs_is_hg()) {
1980		    $email_hg_since = $str;
1981		}
1982		$rerun = 1;
1983	    } elsif ($sel eq "t") {
1984		bool_invert(\$email_git_all_signature_types);
1985		$rerun = 1;
1986	    } elsif ($sel eq "f") {
1987		bool_invert(\$file_emails);
1988		$rerun = 1;
1989	    } elsif ($sel eq "r") {
1990		bool_invert(\$email_remove_duplicates);
1991		$rerun = 1;
1992	    } elsif ($sel eq "m") {
1993		bool_invert(\$email_use_mailmap);
1994		read_mailmap();
1995		$rerun = 1;
1996	    } elsif ($sel eq "k") {
1997		bool_invert(\$keywords);
1998		$rerun = 1;
1999	    } elsif ($sel eq "p") {
2000		if ($str ne "" && $val >= 0) {
2001		    $pattern_depth = $val;
2002		    $rerun = 1;
2003		}
2004	    } elsif ($sel eq "h" || $sel eq "?") {
2005		print STDERR <<EOT
2006
2007Interactive mode allows you to select the various maintainers, submitters,
2008commit signers and mailing lists that could be CC'd on a patch.
2009
2010Any *'d entry is selected.
2011
2012If you have git or hg installed, you can choose to summarize the commit
2013history of files in the patch.  Also, each line of the current file can
2014be matched to its commit author and that commits signers with blame.
2015
2016Various knobs exist to control the length of time for active commit
2017tracking, the maximum number of commit authors and signers to add,
2018and such.
2019
2020Enter selections at the prompt until you are satisfied that the selected
2021maintainers are appropriate.  You may enter multiple selections separated
2022by either commas or spaces.
2023
2024EOT
2025	    } else {
2026		print STDERR "invalid option: '$nr'\n";
2027		$redraw = 0;
2028	    }
2029	}
2030	if ($rerun) {
2031	    print STDERR "git-blame can be very slow, please have patience..."
2032		if ($email_git_blame);
2033	    goto &get_maintainers;
2034	}
2035    }
2036
2037    #drop not selected entries
2038    $count = 0;
2039    my @new_emailto = ();
2040    foreach my $entry (@list) {
2041	if ($selected{$count}) {
2042	    push(@new_emailto, $list[$count]);
2043	}
2044	$count++;
2045    }
2046    return @new_emailto;
2047}
2048
2049sub bool_invert {
2050    my ($bool_ref) = @_;
2051
2052    if ($$bool_ref) {
2053	$$bool_ref = 0;
2054    } else {
2055	$$bool_ref = 1;
2056    }
2057}
2058
2059sub deduplicate_email {
2060    my ($email) = @_;
2061
2062    my $matched = 0;
2063    my ($name, $address) = parse_email($email);
2064    $email = format_email($name, $address, 1);
2065    $email = mailmap_email($email);
2066
2067    return $email if (!$email_remove_duplicates);
2068
2069    ($name, $address) = parse_email($email);
2070
2071    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
2072	$name = $deduplicate_name_hash{lc($name)}->[0];
2073	$address = $deduplicate_name_hash{lc($name)}->[1];
2074	$matched = 1;
2075    } elsif ($deduplicate_address_hash{lc($address)}) {
2076	$name = $deduplicate_address_hash{lc($address)}->[0];
2077	$address = $deduplicate_address_hash{lc($address)}->[1];
2078	$matched = 1;
2079    }
2080    if (!$matched) {
2081	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
2082	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
2083    }
2084    $email = format_email($name, $address, 1);
2085    $email = mailmap_email($email);
2086    return $email;
2087}
2088
2089sub save_commits_by_author {
2090    my (@lines) = @_;
2091
2092    my @authors = ();
2093    my @commits = ();
2094    my @subjects = ();
2095
2096    foreach my $line (@lines) {
2097	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2098	    my $author = $1;
2099	    $author = deduplicate_email($author);
2100	    push(@authors, $author);
2101	}
2102	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2103	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2104    }
2105
2106    for (my $i = 0; $i < @authors; $i++) {
2107	my $exists = 0;
2108	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
2109	    if (@{$ref}[0] eq $commits[$i] &&
2110		@{$ref}[1] eq $subjects[$i]) {
2111		$exists = 1;
2112		last;
2113	    }
2114	}
2115	if (!$exists) {
2116	    push(@{$commit_author_hash{$authors[$i]}},
2117		 [ ($commits[$i], $subjects[$i]) ]);
2118	}
2119    }
2120}
2121
2122sub save_commits_by_signer {
2123    my (@lines) = @_;
2124
2125    my $commit = "";
2126    my $subject = "";
2127
2128    foreach my $line (@lines) {
2129	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2130	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2131	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
2132	    my @signatures = ($line);
2133	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
2134	    my @types = @$types_ref;
2135	    my @signers = @$signers_ref;
2136
2137	    my $type = $types[0];
2138	    my $signer = $signers[0];
2139
2140	    $signer = deduplicate_email($signer);
2141
2142	    my $exists = 0;
2143	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
2144		if (@{$ref}[0] eq $commit &&
2145		    @{$ref}[1] eq $subject &&
2146		    @{$ref}[2] eq $type) {
2147		    $exists = 1;
2148		    last;
2149		}
2150	    }
2151	    if (!$exists) {
2152		push(@{$commit_signer_hash{$signer}},
2153		     [ ($commit, $subject, $type) ]);
2154	    }
2155	}
2156    }
2157}
2158
2159sub vcs_assign {
2160    my ($role, $divisor, @lines) = @_;
2161
2162    my %hash;
2163    my $count = 0;
2164
2165    return if (@lines <= 0);
2166
2167    if ($divisor <= 0) {
2168	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
2169	$divisor = 1;
2170    }
2171
2172    @lines = mailmap(@lines);
2173
2174    return if (@lines <= 0);
2175
2176    @lines = sort(@lines);
2177
2178    # uniq -c
2179    $hash{$_}++ for @lines;
2180
2181    # sort -rn
2182    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
2183	my $sign_offs = $hash{$line};
2184	my $percent = $sign_offs * 100 / $divisor;
2185
2186	$percent = 100 if ($percent > 100);
2187	next if (ignore_email_address($line));
2188	$count++;
2189	last if ($sign_offs < $email_git_min_signatures ||
2190		 $count > $email_git_max_maintainers ||
2191		 $percent < $email_git_min_percent);
2192	push_email_address($line, '');
2193	if ($output_rolestats) {
2194	    my $fmt_percent = sprintf("%.0f", $percent);
2195	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
2196	} else {
2197	    add_role($line, $role);
2198	}
2199    }
2200}
2201
2202sub vcs_file_signoffs {
2203    my ($file) = @_;
2204
2205    my $authors_ref;
2206    my $signers_ref;
2207    my $stats_ref;
2208    my @authors = ();
2209    my @signers = ();
2210    my @stats = ();
2211    my $commits;
2212
2213    $vcs_used = vcs_exists();
2214    return if (!$vcs_used);
2215
2216    my $cmd = $VCS_cmds{"find_signers_cmd"};
2217    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2218
2219    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2220
2221    @signers = @{$signers_ref} if defined $signers_ref;
2222    @authors = @{$authors_ref} if defined $authors_ref;
2223    @stats = @{$stats_ref} if defined $stats_ref;
2224
2225#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
2226
2227    foreach my $signer (@signers) {
2228	$signer = deduplicate_email($signer);
2229    }
2230
2231    vcs_assign("commit_signer", $commits, @signers);
2232    vcs_assign("authored", $commits, @authors);
2233    if ($#authors == $#stats) {
2234	my $stat_pattern = $VCS_cmds{"stat_pattern"};
2235	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
2236
2237	my $added = 0;
2238	my $deleted = 0;
2239	for (my $i = 0; $i <= $#stats; $i++) {
2240	    if ($stats[$i] =~ /$stat_pattern/) {
2241		$added += $1;
2242		$deleted += $2;
2243	    }
2244	}
2245	my @tmp_authors = uniq(@authors);
2246	foreach my $author (@tmp_authors) {
2247	    $author = deduplicate_email($author);
2248	}
2249	@tmp_authors = uniq(@tmp_authors);
2250	my @list_added = ();
2251	my @list_deleted = ();
2252	foreach my $author (@tmp_authors) {
2253	    my $auth_added = 0;
2254	    my $auth_deleted = 0;
2255	    for (my $i = 0; $i <= $#stats; $i++) {
2256		if ($author eq deduplicate_email($authors[$i]) &&
2257		    $stats[$i] =~ /$stat_pattern/) {
2258		    $auth_added += $1;
2259		    $auth_deleted += $2;
2260		}
2261	    }
2262	    for (my $i = 0; $i < $auth_added; $i++) {
2263		push(@list_added, $author);
2264	    }
2265	    for (my $i = 0; $i < $auth_deleted; $i++) {
2266		push(@list_deleted, $author);
2267	    }
2268	}
2269	vcs_assign("added_lines", $added, @list_added);
2270	vcs_assign("removed_lines", $deleted, @list_deleted);
2271    }
2272}
2273
2274sub vcs_file_blame {
2275    my ($file) = @_;
2276
2277    my @signers = ();
2278    my @all_commits = ();
2279    my @commits = ();
2280    my $total_commits;
2281    my $total_lines;
2282
2283    $vcs_used = vcs_exists();
2284    return if (!$vcs_used);
2285
2286    @all_commits = vcs_blame($file);
2287    @commits = uniq(@all_commits);
2288    $total_commits = @commits;
2289    $total_lines = @all_commits;
2290
2291    if ($email_git_blame_signatures) {
2292	if (vcs_is_hg()) {
2293	    my $commit_count;
2294	    my $commit_authors_ref;
2295	    my $commit_signers_ref;
2296	    my $stats_ref;
2297	    my @commit_authors = ();
2298	    my @commit_signers = ();
2299	    my $commit = join(" -r ", @commits);
2300	    my $cmd;
2301
2302	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
2303	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2304
2305	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2306	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2307	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2308
2309	    push(@signers, @commit_signers);
2310	} else {
2311	    foreach my $commit (@commits) {
2312		my $commit_count;
2313		my $commit_authors_ref;
2314		my $commit_signers_ref;
2315		my $stats_ref;
2316		my @commit_authors = ();
2317		my @commit_signers = ();
2318		my $cmd;
2319
2320		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
2321		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2322
2323		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2324		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2325		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2326
2327		push(@signers, @commit_signers);
2328	    }
2329	}
2330    }
2331
2332    if ($from_filename) {
2333	if ($output_rolestats) {
2334	    my @blame_signers;
2335	    if (vcs_is_hg()) {{		# Double brace for last exit
2336		my $commit_count;
2337		my @commit_signers = ();
2338		@commits = uniq(@commits);
2339		@commits = sort(@commits);
2340		my $commit = join(" -r ", @commits);
2341		my $cmd;
2342
2343		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2344		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2345
2346		my @lines = ();
2347
2348		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2349
2350		if (!$email_git_penguin_chiefs) {
2351		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2352		}
2353
2354		last if !@lines;
2355
2356		my @authors = ();
2357		foreach my $line (@lines) {
2358		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2359			my $author = $1;
2360			$author = deduplicate_email($author);
2361			push(@authors, $author);
2362		    }
2363		}
2364
2365		save_commits_by_author(@lines) if ($interactive);
2366		save_commits_by_signer(@lines) if ($interactive);
2367
2368		push(@signers, @authors);
2369	    }}
2370	    else {
2371		foreach my $commit (@commits) {
2372		    my $i;
2373		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2374		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2375		    my @author = vcs_find_author($cmd);
2376		    next if !@author;
2377
2378		    my $formatted_author = deduplicate_email($author[0]);
2379
2380		    my $count = grep(/$commit/, @all_commits);
2381		    for ($i = 0; $i < $count ; $i++) {
2382			push(@blame_signers, $formatted_author);
2383		    }
2384		}
2385	    }
2386	    if (@blame_signers) {
2387		vcs_assign("authored lines", $total_lines, @blame_signers);
2388	    }
2389	}
2390	foreach my $signer (@signers) {
2391	    $signer = deduplicate_email($signer);
2392	}
2393	vcs_assign("commits", $total_commits, @signers);
2394    } else {
2395	foreach my $signer (@signers) {
2396	    $signer = deduplicate_email($signer);
2397	}
2398	vcs_assign("modified commits", $total_commits, @signers);
2399    }
2400}
2401
2402sub vcs_file_exists {
2403    my ($file) = @_;
2404
2405    my $exists;
2406
2407    my $vcs_used = vcs_exists();
2408    return 0 if (!$vcs_used);
2409
2410    my $cmd = $VCS_cmds{"file_exists_cmd"};
2411    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2412    $cmd .= " 2>&1";
2413    $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
2414
2415    return 0 if ($? != 0);
2416
2417    return $exists;
2418}
2419
2420sub vcs_list_files {
2421    my ($file) = @_;
2422
2423    my @lsfiles = ();
2424
2425    my $vcs_used = vcs_exists();
2426    return 0 if (!$vcs_used);
2427
2428    my $cmd = $VCS_cmds{"list_files_cmd"};
2429    $cmd =~ s/(\$\w+)/$1/eeg;   # interpolate $cmd
2430    @lsfiles = &{$VCS_cmds{"execute_cmd"}}($cmd);
2431
2432    return () if ($? != 0);
2433
2434    return @lsfiles;
2435}
2436
2437sub uniq {
2438    my (@parms) = @_;
2439
2440    my %saw;
2441    @parms = grep(!$saw{$_}++, @parms);
2442    return @parms;
2443}
2444
2445sub sort_and_uniq {
2446    my (@parms) = @_;
2447
2448    my %saw;
2449    @parms = sort @parms;
2450    @parms = grep(!$saw{$_}++, @parms);
2451    return @parms;
2452}
2453
2454sub clean_file_emails {
2455    my (@file_emails) = @_;
2456    my @fmt_emails = ();
2457
2458    foreach my $email (@file_emails) {
2459	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2460	my ($name, $address) = parse_email($email);
2461	if ($name eq '"[,\.]"') {
2462	    $name = "";
2463	}
2464
2465	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2466	if (@nw > 2) {
2467	    my $first = $nw[@nw - 3];
2468	    my $middle = $nw[@nw - 2];
2469	    my $last = $nw[@nw - 1];
2470
2471	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2472		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2473		(length($middle) == 1 ||
2474		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2475		$name = "$first $middle $last";
2476	    } else {
2477		$name = "$middle $last";
2478	    }
2479	}
2480
2481	if (substr($name, -1) =~ /[,\.]/) {
2482	    $name = substr($name, 0, length($name) - 1);
2483	} elsif (substr($name, -2) =~ /[,\.]"/) {
2484	    $name = substr($name, 0, length($name) - 2) . '"';
2485	}
2486
2487	if (substr($name, 0, 1) =~ /[,\.]/) {
2488	    $name = substr($name, 1, length($name) - 1);
2489	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2490	    $name = '"' . substr($name, 2, length($name) - 2);
2491	}
2492
2493	my $fmt_email = format_email($name, $address, $email_usename);
2494	push(@fmt_emails, $fmt_email);
2495    }
2496    return @fmt_emails;
2497}
2498
2499sub merge_email {
2500    my @lines;
2501    my %saw;
2502
2503    for (@_) {
2504	my ($address, $role) = @$_;
2505	if (!$saw{$address}) {
2506	    if ($output_roles) {
2507		push(@lines, "$address ($role)");
2508	    } else {
2509		push(@lines, $address);
2510	    }
2511	    $saw{$address} = 1;
2512	}
2513    }
2514
2515    return @lines;
2516}
2517
2518sub output {
2519    my (@parms) = @_;
2520
2521    if ($output_multiline) {
2522	foreach my $line (@parms) {
2523	    print("${line}\n");
2524	}
2525    } else {
2526	print(join($output_separator, @parms));
2527	print("\n");
2528    }
2529}
2530
2531my $rfc822re;
2532
2533sub make_rfc822re {
2534#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2535#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2536#   This regexp will only work on addresses which have had comments stripped
2537#   and replaced with rfc822_lwsp.
2538
2539    my $specials = '()<>@,;:\\\\".\\[\\]';
2540    my $controls = '\\000-\\037\\177';
2541
2542    my $dtext = "[^\\[\\]\\r\\\\]";
2543    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2544
2545    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2546
2547#   Use zero-width assertion to spot the limit of an atom.  A simple
2548#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2549    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2550    my $word = "(?:$atom|$quoted_string)";
2551    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2552
2553    my $sub_domain = "(?:$atom|$domain_literal)";
2554    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2555
2556    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2557
2558    my $phrase = "$word*";
2559    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2560    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2561    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2562
2563    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2564    my $address = "(?:$mailbox|$group)";
2565
2566    return "$rfc822_lwsp*$address";
2567}
2568
2569sub rfc822_strip_comments {
2570    my $s = shift;
2571#   Recursively remove comments, and replace with a single space.  The simpler
2572#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2573#   chars in atoms, for example.
2574
2575    while ($s =~ s/^((?:[^"\\]|\\.)*
2576                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2577                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2578    return $s;
2579}
2580
2581#   valid: returns true if the parameter is an RFC822 valid address
2582#
2583sub rfc822_valid {
2584    my $s = rfc822_strip_comments(shift);
2585
2586    if (!$rfc822re) {
2587        $rfc822re = make_rfc822re();
2588    }
2589
2590    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2591}
2592
2593#   validlist: In scalar context, returns true if the parameter is an RFC822
2594#              valid list of addresses.
2595#
2596#              In list context, returns an empty list on failure (an invalid
2597#              address was found); otherwise a list whose first element is the
2598#              number of addresses found and whose remaining elements are the
2599#              addresses.  This is needed to disambiguate failure (invalid)
2600#              from success with no addresses found, because an empty string is
2601#              a valid list.
2602
2603sub rfc822_validlist {
2604    my $s = rfc822_strip_comments(shift);
2605
2606    if (!$rfc822re) {
2607        $rfc822re = make_rfc822re();
2608    }
2609    # * null list items are valid according to the RFC
2610    # * the '1' business is to aid in distinguishing failure from no results
2611
2612    my @r;
2613    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2614	$s =~ m/^$rfc822_char*$/) {
2615        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2616            push(@r, $1);
2617        }
2618        return wantarray ? (scalar(@r), @r) : 1;
2619    }
2620    return wantarray ? () : 0;
2621}
2622