xref: /openbmc/linux/scripts/get_maintainer.pl (revision f9834f18)
1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
4# (c) 2007, Joe Perches <joe@perches.com>
5#           created from checkpatch.pl
6#
7# Print selected MAINTAINERS information for
8# the files modified in a patch or for a file
9#
10# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
11#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
12
13use warnings;
14use strict;
15
16my $P = $0;
17my $V = '0.26';
18
19use Getopt::Long qw(:config no_auto_abbrev);
20use Cwd;
21use File::Find;
22
23my $cur_path = fastgetcwd() . '/';
24my $lk_path = "./";
25my $email = 1;
26my $email_usename = 1;
27my $email_maintainer = 1;
28my $email_reviewer = 1;
29my $email_fixes = 1;
30my $email_list = 1;
31my $email_moderated_list = 1;
32my $email_subscriber_list = 0;
33my $email_git_penguin_chiefs = 0;
34my $email_git = 0;
35my $email_git_all_signature_types = 0;
36my $email_git_blame = 0;
37my $email_git_blame_signatures = 1;
38my $email_git_fallback = 1;
39my $email_git_min_signatures = 1;
40my $email_git_max_maintainers = 5;
41my $email_git_min_percent = 5;
42my $email_git_since = "1-year-ago";
43my $email_hg_since = "-365";
44my $interactive = 0;
45my $email_remove_duplicates = 1;
46my $email_use_mailmap = 1;
47my $output_multiline = 1;
48my $output_separator = ", ";
49my $output_roles = 0;
50my $output_rolestats = 1;
51my $output_section_maxlen = 50;
52my $scm = 0;
53my $tree = 1;
54my $web = 0;
55my $subsystem = 0;
56my $status = 0;
57my $letters = "";
58my $keywords = 1;
59my $sections = 0;
60my $file_emails = 0;
61my $from_filename = 0;
62my $pattern_depth = 0;
63my $self_test = undef;
64my $version = 0;
65my $help = 0;
66my $find_maintainer_files = 0;
67my $maintainer_path;
68my $vcs_used = 0;
69
70my $exit = 0;
71
72my %commit_author_hash;
73my %commit_signer_hash;
74
75my @penguin_chief = ();
76push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
77#Andrew wants in on most everything - 2009/01/14
78#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
79
80my @penguin_chief_names = ();
81foreach my $chief (@penguin_chief) {
82    if ($chief =~ m/^(.*):(.*)/) {
83	my $chief_name = $1;
84	my $chief_addr = $2;
85	push(@penguin_chief_names, $chief_name);
86    }
87}
88my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
89
90# Signature types of people who are either
91# 	a) responsible for the code in question, or
92# 	b) familiar enough with it to give relevant feedback
93my @signature_tags = ();
94push(@signature_tags, "Signed-off-by:");
95push(@signature_tags, "Reviewed-by:");
96push(@signature_tags, "Acked-by:");
97
98my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
99
100# rfc822 email address - preloaded methods go here.
101my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
102my $rfc822_char = '[\\000-\\377]';
103
104# VCS command support: class-like functions and strings
105
106my %VCS_cmds;
107
108my %VCS_cmds_git = (
109    "execute_cmd" => \&git_execute_cmd,
110    "available" => '(which("git") ne "") && (-e ".git")',
111    "find_signers_cmd" =>
112	"git log --no-color --follow --since=\$email_git_since " .
113	    '--numstat --no-merges ' .
114	    '--format="GitCommit: %H%n' .
115		      'GitAuthor: %an <%ae>%n' .
116		      'GitDate: %aD%n' .
117		      'GitSubject: %s%n' .
118		      '%b%n"' .
119	    " -- \$file",
120    "find_commit_signers_cmd" =>
121	"git log --no-color " .
122	    '--numstat ' .
123	    '--format="GitCommit: %H%n' .
124		      'GitAuthor: %an <%ae>%n' .
125		      'GitDate: %aD%n' .
126		      'GitSubject: %s%n' .
127		      '%b%n"' .
128	    " -1 \$commit",
129    "find_commit_author_cmd" =>
130	"git log --no-color " .
131	    '--numstat ' .
132	    '--format="GitCommit: %H%n' .
133		      'GitAuthor: %an <%ae>%n' .
134		      'GitDate: %aD%n' .
135		      'GitSubject: %s%n"' .
136	    " -1 \$commit",
137    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
138    "blame_file_cmd" => "git blame -l \$file",
139    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
140    "blame_commit_pattern" => "^([0-9a-f]+) ",
141    "author_pattern" => "^GitAuthor: (.*)",
142    "subject_pattern" => "^GitSubject: (.*)",
143    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
144    "file_exists_cmd" => "git ls-files \$file",
145    "list_files_cmd" => "git ls-files \$file",
146);
147
148my %VCS_cmds_hg = (
149    "execute_cmd" => \&hg_execute_cmd,
150    "available" => '(which("hg") ne "") && (-d ".hg")',
151    "find_signers_cmd" =>
152	"hg log --date=\$email_hg_since " .
153	    "--template='HgCommit: {node}\\n" .
154	                "HgAuthor: {author}\\n" .
155			"HgSubject: {desc}\\n'" .
156	    " -- \$file",
157    "find_commit_signers_cmd" =>
158	"hg log " .
159	    "--template='HgSubject: {desc}\\n'" .
160	    " -r \$commit",
161    "find_commit_author_cmd" =>
162	"hg log " .
163	    "--template='HgCommit: {node}\\n" .
164		        "HgAuthor: {author}\\n" .
165			"HgSubject: {desc|firstline}\\n'" .
166	    " -r \$commit",
167    "blame_range_cmd" => "",		# not supported
168    "blame_file_cmd" => "hg blame -n \$file",
169    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
170    "blame_commit_pattern" => "^([ 0-9a-f]+):",
171    "author_pattern" => "^HgAuthor: (.*)",
172    "subject_pattern" => "^HgSubject: (.*)",
173    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
174    "file_exists_cmd" => "hg files \$file",
175    "list_files_cmd" => "hg manifest -R \$file",
176);
177
178my $conf = which_conf(".get_maintainer.conf");
179if (-f $conf) {
180    my @conf_args;
181    open(my $conffile, '<', "$conf")
182	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
183
184    while (<$conffile>) {
185	my $line = $_;
186
187	$line =~ s/\s*\n?$//g;
188	$line =~ s/^\s*//g;
189	$line =~ s/\s+/ /g;
190
191	next if ($line =~ m/^\s*#/);
192	next if ($line =~ m/^\s*$/);
193
194	my @words = split(" ", $line);
195	foreach my $word (@words) {
196	    last if ($word =~ m/^#/);
197	    push (@conf_args, $word);
198	}
199    }
200    close($conffile);
201    unshift(@ARGV, @conf_args) if @conf_args;
202}
203
204my @ignore_emails = ();
205my $ignore_file = which_conf(".get_maintainer.ignore");
206if (-f $ignore_file) {
207    open(my $ignore, '<', "$ignore_file")
208	or warn "$P: Can't find a readable .get_maintainer.ignore file $!\n";
209    while (<$ignore>) {
210	my $line = $_;
211
212	$line =~ s/\s*\n?$//;
213	$line =~ s/^\s*//;
214	$line =~ s/\s+$//;
215	$line =~ s/#.*$//;
216
217	next if ($line =~ m/^\s*$/);
218	if (rfc822_valid($line)) {
219	    push(@ignore_emails, $line);
220	}
221    }
222    close($ignore);
223}
224
225if ($#ARGV > 0) {
226    foreach (@ARGV) {
227        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
228            die "$P: using --self-test does not allow any other option or argument\n";
229        }
230    }
231}
232
233if (!GetOptions(
234		'email!' => \$email,
235		'git!' => \$email_git,
236		'git-all-signature-types!' => \$email_git_all_signature_types,
237		'git-blame!' => \$email_git_blame,
238		'git-blame-signatures!' => \$email_git_blame_signatures,
239		'git-fallback!' => \$email_git_fallback,
240		'git-chief-penguins!' => \$email_git_penguin_chiefs,
241		'git-min-signatures=i' => \$email_git_min_signatures,
242		'git-max-maintainers=i' => \$email_git_max_maintainers,
243		'git-min-percent=i' => \$email_git_min_percent,
244		'git-since=s' => \$email_git_since,
245		'hg-since=s' => \$email_hg_since,
246		'i|interactive!' => \$interactive,
247		'remove-duplicates!' => \$email_remove_duplicates,
248		'mailmap!' => \$email_use_mailmap,
249		'm!' => \$email_maintainer,
250		'r!' => \$email_reviewer,
251		'n!' => \$email_usename,
252		'l!' => \$email_list,
253		'fixes!' => \$email_fixes,
254		'moderated!' => \$email_moderated_list,
255		's!' => \$email_subscriber_list,
256		'multiline!' => \$output_multiline,
257		'roles!' => \$output_roles,
258		'rolestats!' => \$output_rolestats,
259		'separator=s' => \$output_separator,
260		'subsystem!' => \$subsystem,
261		'status!' => \$status,
262		'scm!' => \$scm,
263		'tree!' => \$tree,
264		'web!' => \$web,
265		'letters=s' => \$letters,
266		'pattern-depth=i' => \$pattern_depth,
267		'k|keywords!' => \$keywords,
268		'sections!' => \$sections,
269		'fe|file-emails!' => \$file_emails,
270		'f|file' => \$from_filename,
271		'find-maintainer-files' => \$find_maintainer_files,
272		'mpath|maintainer-path=s' => \$maintainer_path,
273		'self-test:s' => \$self_test,
274		'v|version' => \$version,
275		'h|help|usage' => \$help,
276		)) {
277    die "$P: invalid argument - use --help if necessary\n";
278}
279
280if ($help != 0) {
281    usage();
282    exit 0;
283}
284
285if ($version != 0) {
286    print("${P} ${V}\n");
287    exit 0;
288}
289
290if (defined $self_test) {
291    read_all_maintainer_files();
292    self_test();
293    exit 0;
294}
295
296if (-t STDIN && !@ARGV) {
297    # We're talking to a terminal, but have no command line arguments.
298    die "$P: missing patchfile or -f file - use --help if necessary\n";
299}
300
301$output_multiline = 0 if ($output_separator ne ", ");
302$output_rolestats = 1 if ($interactive);
303$output_roles = 1 if ($output_rolestats);
304
305if ($sections || $letters ne "") {
306    $sections = 1;
307    $email = 0;
308    $email_list = 0;
309    $scm = 0;
310    $status = 0;
311    $subsystem = 0;
312    $web = 0;
313    $keywords = 0;
314    $interactive = 0;
315} else {
316    my $selections = $email + $scm + $status + $subsystem + $web;
317    if ($selections == 0) {
318	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
319    }
320}
321
322if ($email &&
323    ($email_maintainer + $email_reviewer +
324     $email_list + $email_subscriber_list +
325     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
326    die "$P: Please select at least 1 email option\n";
327}
328
329if ($tree && !top_of_kernel_tree($lk_path)) {
330    die "$P: The current directory does not appear to be "
331	. "a linux kernel source tree.\n";
332}
333
334## Read MAINTAINERS for type/value pairs
335
336my @typevalue = ();
337my %keyword_hash;
338my @mfiles = ();
339my @self_test_info = ();
340
341sub read_maintainer_file {
342    my ($file) = @_;
343
344    open (my $maint, '<', "$file")
345	or die "$P: Can't open MAINTAINERS file '$file': $!\n";
346    my $i = 1;
347    while (<$maint>) {
348	my $line = $_;
349	chomp $line;
350
351	if ($line =~ m/^([A-Z]):\s*(.*)/) {
352	    my $type = $1;
353	    my $value = $2;
354
355	    ##Filename pattern matching
356	    if ($type eq "F" || $type eq "X") {
357		$value =~ s@\.@\\\.@g;       ##Convert . to \.
358		$value =~ s/\*/\.\*/g;       ##Convert * to .*
359		$value =~ s/\?/\./g;         ##Convert ? to .
360		##if pattern is a directory and it lacks a trailing slash, add one
361		if ((-d $value)) {
362		    $value =~ s@([^/])$@$1/@;
363		}
364	    } elsif ($type eq "K") {
365		$keyword_hash{@typevalue} = $value;
366	    }
367	    push(@typevalue, "$type:$value");
368	} elsif (!(/^\s*$/ || /^\s*\#/)) {
369	    push(@typevalue, $line);
370	}
371	if (defined $self_test) {
372	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
373	}
374	$i++;
375    }
376    close($maint);
377}
378
379sub find_is_maintainer_file {
380    my ($file) = $_;
381    return if ($file !~ m@/MAINTAINERS$@);
382    $file = $File::Find::name;
383    return if (! -f $file);
384    push(@mfiles, $file);
385}
386
387sub find_ignore_git {
388    return grep { $_ !~ /^\.git$/; } @_;
389}
390
391read_all_maintainer_files();
392
393sub read_all_maintainer_files {
394    my $path = "${lk_path}MAINTAINERS";
395    if (defined $maintainer_path) {
396	$path = $maintainer_path;
397	# Perl Cookbook tilde expansion if necessary
398	$path =~ s@^~([^/]*)@ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7])@ex;
399    }
400
401    if (-d $path) {
402	$path .= '/' if ($path !~ m@/$@);
403	if ($find_maintainer_files) {
404	    find( { wanted => \&find_is_maintainer_file,
405		    preprocess => \&find_ignore_git,
406		    no_chdir => 1,
407		}, "$path");
408	} else {
409	    opendir(DIR, "$path") or die $!;
410	    my @files = readdir(DIR);
411	    closedir(DIR);
412	    foreach my $file (@files) {
413		push(@mfiles, "$path$file") if ($file !~ /^\./);
414	    }
415	}
416    } elsif (-f "$path") {
417	push(@mfiles, "$path");
418    } else {
419	die "$P: MAINTAINER file not found '$path'\n";
420    }
421    die "$P: No MAINTAINER files found in '$path'\n" if (scalar(@mfiles) == 0);
422    foreach my $file (@mfiles) {
423	read_maintainer_file("$file");
424    }
425}
426
427#
428# Read mail address map
429#
430
431my $mailmap;
432
433read_mailmap();
434
435sub read_mailmap {
436    $mailmap = {
437	names => {},
438	addresses => {}
439    };
440
441    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
442
443    open(my $mailmap_file, '<', "${lk_path}.mailmap")
444	or warn "$P: Can't open .mailmap: $!\n";
445
446    while (<$mailmap_file>) {
447	s/#.*$//; #strip comments
448	s/^\s+|\s+$//g; #trim
449
450	next if (/^\s*$/); #skip empty lines
451	#entries have one of the following formats:
452	# name1 <mail1>
453	# <mail1> <mail2>
454	# name1 <mail1> <mail2>
455	# name1 <mail1> name2 <mail2>
456	# (see man git-shortlog)
457
458	if (/^([^<]+)<([^>]+)>$/) {
459	    my $real_name = $1;
460	    my $address = $2;
461
462	    $real_name =~ s/\s+$//;
463	    ($real_name, $address) = parse_email("$real_name <$address>");
464	    $mailmap->{names}->{$address} = $real_name;
465
466	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
467	    my $real_address = $1;
468	    my $wrong_address = $2;
469
470	    $mailmap->{addresses}->{$wrong_address} = $real_address;
471
472	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
473	    my $real_name = $1;
474	    my $real_address = $2;
475	    my $wrong_address = $3;
476
477	    $real_name =~ s/\s+$//;
478	    ($real_name, $real_address) =
479		parse_email("$real_name <$real_address>");
480	    $mailmap->{names}->{$wrong_address} = $real_name;
481	    $mailmap->{addresses}->{$wrong_address} = $real_address;
482
483	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
484	    my $real_name = $1;
485	    my $real_address = $2;
486	    my $wrong_name = $3;
487	    my $wrong_address = $4;
488
489	    $real_name =~ s/\s+$//;
490	    ($real_name, $real_address) =
491		parse_email("$real_name <$real_address>");
492
493	    $wrong_name =~ s/\s+$//;
494	    ($wrong_name, $wrong_address) =
495		parse_email("$wrong_name <$wrong_address>");
496
497	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
498	    $mailmap->{names}->{$wrong_email} = $real_name;
499	    $mailmap->{addresses}->{$wrong_email} = $real_address;
500	}
501    }
502    close($mailmap_file);
503}
504
505## use the filenames on the command line or find the filenames in the patchfiles
506
507my @files = ();
508my @fixes = ();			# If a patch description includes Fixes: lines
509my @range = ();
510my @keyword_tvi = ();
511my @file_emails = ();
512
513if (!@ARGV) {
514    push(@ARGV, "&STDIN");
515}
516
517foreach my $file (@ARGV) {
518    if ($file ne "&STDIN") {
519	##if $file is a directory and it lacks a trailing slash, add one
520	if ((-d $file)) {
521	    $file =~ s@([^/])$@$1/@;
522	} elsif (!(-f $file)) {
523	    die "$P: file '${file}' not found\n";
524	}
525    }
526    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
527	$file =~ s/^\Q${cur_path}\E//;	#strip any absolute path
528	$file =~ s/^\Q${lk_path}\E//;	#or the path to the lk tree
529	push(@files, $file);
530	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
531	    open(my $f, '<', $file)
532		or die "$P: Can't open $file: $!\n";
533	    my $text = do { local($/) ; <$f> };
534	    close($f);
535	    if ($keywords) {
536		foreach my $line (keys %keyword_hash) {
537		    if ($text =~ m/$keyword_hash{$line}/x) {
538			push(@keyword_tvi, $line);
539		    }
540		}
541	    }
542	    if ($file_emails) {
543		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
544		push(@file_emails, clean_file_emails(@poss_addr));
545	    }
546	}
547    } else {
548	my $file_cnt = @files;
549	my $lastfile;
550
551	open(my $patch, "< $file")
552	    or die "$P: Can't open $file: $!\n";
553
554	# We can check arbitrary information before the patch
555	# like the commit message, mail headers, etc...
556	# This allows us to match arbitrary keywords against any part
557	# of a git format-patch generated file (subject tags, etc...)
558
559	my $patch_prefix = "";			#Parsing the intro
560
561	while (<$patch>) {
562	    my $patch_line = $_;
563	    if (m/^ mode change [0-7]+ => [0-7]+ (\S+)\s*$/) {
564		my $filename = $1;
565		push(@files, $filename);
566	    } elsif (m/^rename (?:from|to) (\S+)\s*$/) {
567		my $filename = $1;
568		push(@files, $filename);
569	    } elsif (m/^diff --git a\/(\S+) b\/(\S+)\s*$/) {
570		my $filename1 = $1;
571		my $filename2 = $2;
572		push(@files, $filename1);
573		push(@files, $filename2);
574	    } elsif (m/^Fixes:\s+([0-9a-fA-F]{6,40})/) {
575		push(@fixes, $1) if ($email_fixes);
576	    } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
577		my $filename = $1;
578		$filename =~ s@^[^/]*/@@;
579		$filename =~ s@\n@@;
580		$lastfile = $filename;
581		push(@files, $filename);
582		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
583	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
584		if ($email_git_blame) {
585		    push(@range, "$lastfile:$1:$2");
586		}
587	    } elsif ($keywords) {
588		foreach my $line (keys %keyword_hash) {
589		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
590			push(@keyword_tvi, $line);
591		    }
592		}
593	    }
594	}
595	close($patch);
596
597	if ($file_cnt == @files) {
598	    warn "$P: file '${file}' doesn't appear to be a patch.  "
599		. "Add -f to options?\n";
600	}
601	@files = sort_and_uniq(@files);
602    }
603}
604
605@file_emails = uniq(@file_emails);
606@fixes = uniq(@fixes);
607
608my %email_hash_name;
609my %email_hash_address;
610my @email_to = ();
611my %hash_list_to;
612my @list_to = ();
613my @scm = ();
614my @web = ();
615my @subsystem = ();
616my @status = ();
617my %deduplicate_name_hash = ();
618my %deduplicate_address_hash = ();
619
620my @maintainers = get_maintainers();
621if (@maintainers) {
622    @maintainers = merge_email(@maintainers);
623    output(@maintainers);
624}
625
626if ($scm) {
627    @scm = uniq(@scm);
628    output(@scm);
629}
630
631if ($status) {
632    @status = uniq(@status);
633    output(@status);
634}
635
636if ($subsystem) {
637    @subsystem = uniq(@subsystem);
638    output(@subsystem);
639}
640
641if ($web) {
642    @web = uniq(@web);
643    output(@web);
644}
645
646exit($exit);
647
648sub self_test {
649    my @lsfiles = ();
650    my @good_links = ();
651    my @bad_links = ();
652    my @section_headers = ();
653    my $index = 0;
654
655    @lsfiles = vcs_list_files($lk_path);
656
657    for my $x (@self_test_info) {
658	$index++;
659
660	## Section header duplication and missing section content
661	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
662	    $x->{line} =~ /^\S[^:]/ &&
663	    defined $self_test_info[$index] &&
664	    $self_test_info[$index]->{line} =~ /^([A-Z]):\s*\S/) {
665	    my $has_S = 0;
666	    my $has_F = 0;
667	    my $has_ML = 0;
668	    my $status = "";
669	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
670		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
671	    } else {
672		push(@section_headers, $x->{line});
673	    }
674	    my $nextline = $index;
675	    while (defined $self_test_info[$nextline] &&
676		   $self_test_info[$nextline]->{line} =~ /^([A-Z]):\s*(\S.*)/) {
677		my $type = $1;
678		my $value = $2;
679		if ($type eq "S") {
680		    $has_S = 1;
681		    $status = $value;
682		} elsif ($type eq "F" || $type eq "N") {
683		    $has_F = 1;
684		} elsif ($type eq "M" || $type eq "R" || $type eq "L") {
685		    $has_ML = 1;
686		}
687		$nextline++;
688	    }
689	    if (!$has_ML && $status !~ /orphan|obsolete/i) {
690		print("$x->{file}:$x->{linenr}: warning: section without email address\t$x->{line}\n");
691	    }
692	    if (!$has_S) {
693		print("$x->{file}:$x->{linenr}: warning: section without status \t$x->{line}\n");
694	    }
695	    if (!$has_F) {
696		print("$x->{file}:$x->{linenr}: warning: section without file pattern\t$x->{line}\n");
697	    }
698	}
699
700	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
701
702	my $type = $1;
703	my $value = $2;
704
705	## Filename pattern matching
706	if (($type eq "F" || $type eq "X") &&
707	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
708	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
709	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
710	    $value =~ s/\?/\./g;         ##Convert ? to .
711	    ##if pattern is a directory and it lacks a trailing slash, add one
712	    if ((-d $value)) {
713		$value =~ s@([^/])$@$1/@;
714	    }
715	    if (!grep(m@^$value@, @lsfiles)) {
716		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
717	    }
718
719	## Link reachability
720	} elsif (($type eq "W" || $type eq "Q" || $type eq "B") &&
721		 $value =~ /^https?:/ &&
722		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
723	    next if (grep(m@^\Q$value\E$@, @good_links));
724	    my $isbad = 0;
725	    if (grep(m@^\Q$value\E$@, @bad_links)) {
726	        $isbad = 1;
727	    } else {
728		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
729		if ($? == 0) {
730		    push(@good_links, $value);
731		} else {
732		    push(@bad_links, $value);
733		    $isbad = 1;
734		}
735	    }
736	    if ($isbad) {
737	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
738	    }
739
740	## SCM reachability
741	} elsif ($type eq "T" &&
742		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
743	    next if (grep(m@^\Q$value\E$@, @good_links));
744	    my $isbad = 0;
745	    if (grep(m@^\Q$value\E$@, @bad_links)) {
746	        $isbad = 1;
747            } elsif ($value !~ /^(?:git|quilt|hg)\s+\S/) {
748		print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
749	    } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
750		my $url = $1;
751		my $branch = "";
752		$branch = $3 if $3;
753		my $output = `git ls-remote --exit-code -h "$url" $branch > /dev/null 2>&1`;
754		if ($? == 0) {
755		    push(@good_links, $value);
756		} else {
757		    push(@bad_links, $value);
758		    $isbad = 1;
759		}
760	    } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
761		my $url = $1;
762		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
763		if ($? == 0) {
764		    push(@good_links, $value);
765		} else {
766		    push(@bad_links, $value);
767		    $isbad = 1;
768		}
769	    }
770	    if ($isbad) {
771		print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
772	    }
773	}
774    }
775}
776
777sub ignore_email_address {
778    my ($address) = @_;
779
780    foreach my $ignore (@ignore_emails) {
781	return 1 if ($ignore eq $address);
782    }
783
784    return 0;
785}
786
787sub range_is_maintained {
788    my ($start, $end) = @_;
789
790    for (my $i = $start; $i < $end; $i++) {
791	my $line = $typevalue[$i];
792	if ($line =~ m/^([A-Z]):\s*(.*)/) {
793	    my $type = $1;
794	    my $value = $2;
795	    if ($type eq 'S') {
796		if ($value =~ /(maintain|support)/i) {
797		    return 1;
798		}
799	    }
800	}
801    }
802    return 0;
803}
804
805sub range_has_maintainer {
806    my ($start, $end) = @_;
807
808    for (my $i = $start; $i < $end; $i++) {
809	my $line = $typevalue[$i];
810	if ($line =~ m/^([A-Z]):\s*(.*)/) {
811	    my $type = $1;
812	    my $value = $2;
813	    if ($type eq 'M') {
814		return 1;
815	    }
816	}
817    }
818    return 0;
819}
820
821sub get_maintainers {
822    %email_hash_name = ();
823    %email_hash_address = ();
824    %commit_author_hash = ();
825    %commit_signer_hash = ();
826    @email_to = ();
827    %hash_list_to = ();
828    @list_to = ();
829    @scm = ();
830    @web = ();
831    @subsystem = ();
832    @status = ();
833    %deduplicate_name_hash = ();
834    %deduplicate_address_hash = ();
835    if ($email_git_all_signature_types) {
836	$signature_pattern = "(.+?)[Bb][Yy]:";
837    } else {
838	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
839    }
840
841    # Find responsible parties
842
843    my %exact_pattern_match_hash = ();
844
845    foreach my $file (@files) {
846
847	my %hash;
848	my $tvi = find_first_section();
849	while ($tvi < @typevalue) {
850	    my $start = find_starting_index($tvi);
851	    my $end = find_ending_index($tvi);
852	    my $exclude = 0;
853	    my $i;
854
855	    #Do not match excluded file patterns
856
857	    for ($i = $start; $i < $end; $i++) {
858		my $line = $typevalue[$i];
859		if ($line =~ m/^([A-Z]):\s*(.*)/) {
860		    my $type = $1;
861		    my $value = $2;
862		    if ($type eq 'X') {
863			if (file_match_pattern($file, $value)) {
864			    $exclude = 1;
865			    last;
866			}
867		    }
868		}
869	    }
870
871	    if (!$exclude) {
872		for ($i = $start; $i < $end; $i++) {
873		    my $line = $typevalue[$i];
874		    if ($line =~ m/^([A-Z]):\s*(.*)/) {
875			my $type = $1;
876			my $value = $2;
877			if ($type eq 'F') {
878			    if (file_match_pattern($file, $value)) {
879				my $value_pd = ($value =~ tr@/@@);
880				my $file_pd = ($file  =~ tr@/@@);
881				$value_pd++ if (substr($value,-1,1) ne "/");
882				$value_pd = -1 if ($value =~ /^\.\*/);
883				if ($value_pd >= $file_pd &&
884				    range_is_maintained($start, $end) &&
885				    range_has_maintainer($start, $end)) {
886				    $exact_pattern_match_hash{$file} = 1;
887				}
888				if ($pattern_depth == 0 ||
889				    (($file_pd - $value_pd) < $pattern_depth)) {
890				    $hash{$tvi} = $value_pd;
891				}
892			    }
893			} elsif ($type eq 'N') {
894			    if ($file =~ m/$value/x) {
895				$hash{$tvi} = 0;
896			    }
897			}
898		    }
899		}
900	    }
901	    $tvi = $end + 1;
902	}
903
904	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
905	    add_categories($line);
906	    if ($sections) {
907		my $i;
908		my $start = find_starting_index($line);
909		my $end = find_ending_index($line);
910		for ($i = $start; $i < $end; $i++) {
911		    my $line = $typevalue[$i];
912		    if ($line =~ /^[FX]:/) {		##Restore file patterns
913			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
914			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
915			$line =~ s/\\\./\./g;       	##Convert \. to .
916			$line =~ s/\.\*/\*/g;       	##Convert .* to *
917		    }
918		    my $count = $line =~ s/^([A-Z]):/$1:\t/g;
919		    if ($letters eq "" || (!$count || $letters =~ /$1/i)) {
920			print("$line\n");
921		    }
922		}
923		print("\n");
924	    }
925	}
926    }
927
928    if ($keywords) {
929	@keyword_tvi = sort_and_uniq(@keyword_tvi);
930	foreach my $line (@keyword_tvi) {
931	    add_categories($line);
932	}
933    }
934
935    foreach my $email (@email_to, @list_to) {
936	$email->[0] = deduplicate_email($email->[0]);
937    }
938
939    foreach my $file (@files) {
940	if ($email &&
941	    ($email_git || ($email_git_fallback &&
942			    !$exact_pattern_match_hash{$file}))) {
943	    vcs_file_signoffs($file);
944	}
945	if ($email && $email_git_blame) {
946	    vcs_file_blame($file);
947	}
948    }
949
950    if ($email) {
951	foreach my $chief (@penguin_chief) {
952	    if ($chief =~ m/^(.*):(.*)/) {
953		my $email_address;
954
955		$email_address = format_email($1, $2, $email_usename);
956		if ($email_git_penguin_chiefs) {
957		    push(@email_to, [$email_address, 'chief penguin']);
958		} else {
959		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
960		}
961	    }
962	}
963
964	foreach my $email (@file_emails) {
965	    my ($name, $address) = parse_email($email);
966
967	    my $tmp_email = format_email($name, $address, $email_usename);
968	    push_email_address($tmp_email, '');
969	    add_role($tmp_email, 'in file');
970	}
971    }
972
973    foreach my $fix (@fixes) {
974	vcs_add_commit_signers($fix, "blamed_fixes");
975    }
976
977    my @to = ();
978    if ($email || $email_list) {
979	if ($email) {
980	    @to = (@to, @email_to);
981	}
982	if ($email_list) {
983	    @to = (@to, @list_to);
984	}
985    }
986
987    if ($interactive) {
988	@to = interactive_get_maintainers(\@to);
989    }
990
991    return @to;
992}
993
994sub file_match_pattern {
995    my ($file, $pattern) = @_;
996    if (substr($pattern, -1) eq "/") {
997	if ($file =~ m@^$pattern@) {
998	    return 1;
999	}
1000    } else {
1001	if ($file =~ m@^$pattern@) {
1002	    my $s1 = ($file =~ tr@/@@);
1003	    my $s2 = ($pattern =~ tr@/@@);
1004	    if ($s1 == $s2) {
1005		return 1;
1006	    }
1007	}
1008    }
1009    return 0;
1010}
1011
1012sub usage {
1013    print <<EOT;
1014usage: $P [options] patchfile
1015       $P [options] -f file|directory
1016version: $V
1017
1018MAINTAINER field selection options:
1019  --email => print email address(es) if any
1020    --git => include recent git \*-by: signers
1021    --git-all-signature-types => include signers regardless of signature type
1022        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
1023    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
1024    --git-chief-penguins => include ${penguin_chiefs}
1025    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
1026    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
1027    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
1028    --git-blame => use git blame to find modified commits for patch or file
1029    --git-blame-signatures => when used with --git-blame, also include all commit signers
1030    --git-since => git history to use (default: $email_git_since)
1031    --hg-since => hg history to use (default: $email_hg_since)
1032    --interactive => display a menu (mostly useful if used with the --git option)
1033    --m => include maintainer(s) if any
1034    --r => include reviewer(s) if any
1035    --n => include name 'Full Name <addr\@domain.tld>'
1036    --l => include list(s) if any
1037    --moderated => include moderated lists(s) if any (default: true)
1038    --s => include subscriber only list(s) if any (default: false)
1039    --remove-duplicates => minimize duplicate email names/addresses
1040    --roles => show roles (status:subsystem, git-signer, list, etc...)
1041    --rolestats => show roles and statistics (commits/total_commits, %)
1042    --file-emails => add email addresses found in -f file (default: 0 (off))
1043    --fixes => for patches, add signatures of commits with 'Fixes: <commit>' (default: 1 (on))
1044  --scm => print SCM tree(s) if any
1045  --status => print status if any
1046  --subsystem => print subsystem name if any
1047  --web => print website(s) if any
1048
1049Output type options:
1050  --separator [, ] => separator for multiple entries on 1 line
1051    using --separator also sets --nomultiline if --separator is not [, ]
1052  --multiline => print 1 entry per line
1053
1054Other options:
1055  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
1056  --keywords => scan patch for keywords (default: $keywords)
1057  --sections => print all of the subsystem sections with pattern matches
1058  --letters => print all matching 'letter' types from all matching sections
1059  --mailmap => use .mailmap file (default: $email_use_mailmap)
1060  --no-tree => run without a kernel tree
1061  --self-test => show potential issues with MAINTAINERS file content
1062  --version => show version
1063  --help => show this help information
1064
1065Default options:
1066  [--email --tree --nogit --git-fallback --m --r --n --l --multiline
1067   --pattern-depth=0 --remove-duplicates --rolestats]
1068
1069Notes:
1070  Using "-f directory" may give unexpected results:
1071      Used with "--git", git signators for _all_ files in and below
1072          directory are examined as git recurses directories.
1073          Any specified X: (exclude) pattern matches are _not_ ignored.
1074      Used with "--nogit", directory is used as a pattern match,
1075          no individual file within the directory or subdirectory
1076          is matched.
1077      Used with "--git-blame", does not iterate all files in directory
1078  Using "--git-blame" is slow and may add old committers and authors
1079      that are no longer active maintainers to the output.
1080  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
1081      other automated tools that expect only ["name"] <email address>
1082      may not work because of additional output after <email address>.
1083  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
1084      not the percentage of the entire file authored.  # of commits is
1085      not a good measure of amount of code authored.  1 major commit may
1086      contain a thousand lines, 5 trivial commits may modify a single line.
1087  If git is not installed, but mercurial (hg) is installed and an .hg
1088      repository exists, the following options apply to mercurial:
1089          --git,
1090          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
1091          --git-blame
1092      Use --hg-since not --git-since to control date selection
1093  File ".get_maintainer.conf", if it exists in the linux kernel source root
1094      directory, can change whatever get_maintainer defaults are desired.
1095      Entries in this file can be any command line argument.
1096      This file is prepended to any additional command line arguments.
1097      Multiple lines and # comments are allowed.
1098  Most options have both positive and negative forms.
1099      The negative forms for --<foo> are --no<foo> and --no-<foo>.
1100
1101EOT
1102}
1103
1104sub top_of_kernel_tree {
1105    my ($lk_path) = @_;
1106
1107    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
1108	$lk_path .= "/";
1109    }
1110    if (   (-f "${lk_path}COPYING")
1111	&& (-f "${lk_path}CREDITS")
1112	&& (-f "${lk_path}Kbuild")
1113	&& (-e "${lk_path}MAINTAINERS")
1114	&& (-f "${lk_path}Makefile")
1115	&& (-f "${lk_path}README")
1116	&& (-d "${lk_path}Documentation")
1117	&& (-d "${lk_path}arch")
1118	&& (-d "${lk_path}include")
1119	&& (-d "${lk_path}drivers")
1120	&& (-d "${lk_path}fs")
1121	&& (-d "${lk_path}init")
1122	&& (-d "${lk_path}ipc")
1123	&& (-d "${lk_path}kernel")
1124	&& (-d "${lk_path}lib")
1125	&& (-d "${lk_path}scripts")) {
1126	return 1;
1127    }
1128    return 0;
1129}
1130
1131sub parse_email {
1132    my ($formatted_email) = @_;
1133
1134    my $name = "";
1135    my $address = "";
1136
1137    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
1138	$name = $1;
1139	$address = $2;
1140    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
1141	$address = $1;
1142    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
1143	$address = $1;
1144    }
1145
1146    $name =~ s/^\s+|\s+$//g;
1147    $name =~ s/^\"|\"$//g;
1148    $address =~ s/^\s+|\s+$//g;
1149
1150    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
1151	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1152	$name = "\"$name\"";
1153    }
1154
1155    return ($name, $address);
1156}
1157
1158sub format_email {
1159    my ($name, $address, $usename) = @_;
1160
1161    my $formatted_email;
1162
1163    $name =~ s/^\s+|\s+$//g;
1164    $name =~ s/^\"|\"$//g;
1165    $address =~ s/^\s+|\s+$//g;
1166
1167    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
1168	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1169	$name = "\"$name\"";
1170    }
1171
1172    if ($usename) {
1173	if ("$name" eq "") {
1174	    $formatted_email = "$address";
1175	} else {
1176	    $formatted_email = "$name <$address>";
1177	}
1178    } else {
1179	$formatted_email = $address;
1180    }
1181
1182    return $formatted_email;
1183}
1184
1185sub find_first_section {
1186    my $index = 0;
1187
1188    while ($index < @typevalue) {
1189	my $tv = $typevalue[$index];
1190	if (($tv =~ m/^([A-Z]):\s*(.*)/)) {
1191	    last;
1192	}
1193	$index++;
1194    }
1195
1196    return $index;
1197}
1198
1199sub find_starting_index {
1200    my ($index) = @_;
1201
1202    while ($index > 0) {
1203	my $tv = $typevalue[$index];
1204	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1205	    last;
1206	}
1207	$index--;
1208    }
1209
1210    return $index;
1211}
1212
1213sub find_ending_index {
1214    my ($index) = @_;
1215
1216    while ($index < @typevalue) {
1217	my $tv = $typevalue[$index];
1218	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1219	    last;
1220	}
1221	$index++;
1222    }
1223
1224    return $index;
1225}
1226
1227sub get_subsystem_name {
1228    my ($index) = @_;
1229
1230    my $start = find_starting_index($index);
1231
1232    my $subsystem = $typevalue[$start];
1233    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
1234	$subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
1235	$subsystem =~ s/\s*$//;
1236	$subsystem = $subsystem . "...";
1237    }
1238    return $subsystem;
1239}
1240
1241sub get_maintainer_role {
1242    my ($index) = @_;
1243
1244    my $i;
1245    my $start = find_starting_index($index);
1246    my $end = find_ending_index($index);
1247
1248    my $role = "unknown";
1249    my $subsystem = get_subsystem_name($index);
1250
1251    for ($i = $start + 1; $i < $end; $i++) {
1252	my $tv = $typevalue[$i];
1253	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1254	    my $ptype = $1;
1255	    my $pvalue = $2;
1256	    if ($ptype eq "S") {
1257		$role = $pvalue;
1258	    }
1259	}
1260    }
1261
1262    $role = lc($role);
1263    if      ($role eq "supported") {
1264	$role = "supporter";
1265    } elsif ($role eq "maintained") {
1266	$role = "maintainer";
1267    } elsif ($role eq "odd fixes") {
1268	$role = "odd fixer";
1269    } elsif ($role eq "orphan") {
1270	$role = "orphan minder";
1271    } elsif ($role eq "obsolete") {
1272	$role = "obsolete minder";
1273    } elsif ($role eq "buried alive in reporters") {
1274	$role = "chief penguin";
1275    }
1276
1277    return $role . ":" . $subsystem;
1278}
1279
1280sub get_list_role {
1281    my ($index) = @_;
1282
1283    my $subsystem = get_subsystem_name($index);
1284
1285    if ($subsystem eq "THE REST") {
1286	$subsystem = "";
1287    }
1288
1289    return $subsystem;
1290}
1291
1292sub add_categories {
1293    my ($index) = @_;
1294
1295    my $i;
1296    my $start = find_starting_index($index);
1297    my $end = find_ending_index($index);
1298
1299    push(@subsystem, $typevalue[$start]);
1300
1301    for ($i = $start + 1; $i < $end; $i++) {
1302	my $tv = $typevalue[$i];
1303	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1304	    my $ptype = $1;
1305	    my $pvalue = $2;
1306	    if ($ptype eq "L") {
1307		my $list_address = $pvalue;
1308		my $list_additional = "";
1309		my $list_role = get_list_role($i);
1310
1311		if ($list_role ne "") {
1312		    $list_role = ":" . $list_role;
1313		}
1314		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1315		    $list_address = $1;
1316		    $list_additional = $2;
1317		}
1318		if ($list_additional =~ m/subscribers-only/) {
1319		    if ($email_subscriber_list) {
1320			if (!$hash_list_to{lc($list_address)}) {
1321			    $hash_list_to{lc($list_address)} = 1;
1322			    push(@list_to, [$list_address,
1323					    "subscriber list${list_role}"]);
1324			}
1325		    }
1326		} else {
1327		    if ($email_list) {
1328			if (!$hash_list_to{lc($list_address)}) {
1329			    if ($list_additional =~ m/moderated/) {
1330				if ($email_moderated_list) {
1331				    $hash_list_to{lc($list_address)} = 1;
1332				    push(@list_to, [$list_address,
1333						    "moderated list${list_role}"]);
1334				}
1335			    } else {
1336				$hash_list_to{lc($list_address)} = 1;
1337				push(@list_to, [$list_address,
1338						"open list${list_role}"]);
1339			    }
1340			}
1341		    }
1342		}
1343	    } elsif ($ptype eq "M") {
1344		if ($email_maintainer) {
1345		    my $role = get_maintainer_role($i);
1346		    push_email_addresses($pvalue, $role);
1347		}
1348	    } elsif ($ptype eq "R") {
1349		if ($email_reviewer) {
1350		    my $subsystem = get_subsystem_name($i);
1351		    push_email_addresses($pvalue, "reviewer:$subsystem");
1352		}
1353	    } elsif ($ptype eq "T") {
1354		push(@scm, $pvalue);
1355	    } elsif ($ptype eq "W") {
1356		push(@web, $pvalue);
1357	    } elsif ($ptype eq "S") {
1358		push(@status, $pvalue);
1359	    }
1360	}
1361    }
1362}
1363
1364sub email_inuse {
1365    my ($name, $address) = @_;
1366
1367    return 1 if (($name eq "") && ($address eq ""));
1368    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1369    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1370
1371    return 0;
1372}
1373
1374sub push_email_address {
1375    my ($line, $role) = @_;
1376
1377    my ($name, $address) = parse_email($line);
1378
1379    if ($address eq "") {
1380	return 0;
1381    }
1382
1383    if (!$email_remove_duplicates) {
1384	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1385    } elsif (!email_inuse($name, $address)) {
1386	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1387	$email_hash_name{lc($name)}++ if ($name ne "");
1388	$email_hash_address{lc($address)}++;
1389    }
1390
1391    return 1;
1392}
1393
1394sub push_email_addresses {
1395    my ($address, $role) = @_;
1396
1397    my @address_list = ();
1398
1399    if (rfc822_valid($address)) {
1400	push_email_address($address, $role);
1401    } elsif (@address_list = rfc822_validlist($address)) {
1402	my $array_count = shift(@address_list);
1403	while (my $entry = shift(@address_list)) {
1404	    push_email_address($entry, $role);
1405	}
1406    } else {
1407	if (!push_email_address($address, $role)) {
1408	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1409	}
1410    }
1411}
1412
1413sub add_role {
1414    my ($line, $role) = @_;
1415
1416    my ($name, $address) = parse_email($line);
1417    my $email = format_email($name, $address, $email_usename);
1418
1419    foreach my $entry (@email_to) {
1420	if ($email_remove_duplicates) {
1421	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1422	    if (($name eq $entry_name || $address eq $entry_address)
1423		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1424	    ) {
1425		if ($entry->[1] eq "") {
1426		    $entry->[1] = "$role";
1427		} else {
1428		    $entry->[1] = "$entry->[1],$role";
1429		}
1430	    }
1431	} else {
1432	    if ($email eq $entry->[0]
1433		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1434	    ) {
1435		if ($entry->[1] eq "") {
1436		    $entry->[1] = "$role";
1437		} else {
1438		    $entry->[1] = "$entry->[1],$role";
1439		}
1440	    }
1441	}
1442    }
1443}
1444
1445sub which {
1446    my ($bin) = @_;
1447
1448    foreach my $path (split(/:/, $ENV{PATH})) {
1449	if (-e "$path/$bin") {
1450	    return "$path/$bin";
1451	}
1452    }
1453
1454    return "";
1455}
1456
1457sub which_conf {
1458    my ($conf) = @_;
1459
1460    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1461	if (-e "$path/$conf") {
1462	    return "$path/$conf";
1463	}
1464    }
1465
1466    return "";
1467}
1468
1469sub mailmap_email {
1470    my ($line) = @_;
1471
1472    my ($name, $address) = parse_email($line);
1473    my $email = format_email($name, $address, 1);
1474    my $real_name = $name;
1475    my $real_address = $address;
1476
1477    if (exists $mailmap->{names}->{$email} ||
1478	exists $mailmap->{addresses}->{$email}) {
1479	if (exists $mailmap->{names}->{$email}) {
1480	    $real_name = $mailmap->{names}->{$email};
1481	}
1482	if (exists $mailmap->{addresses}->{$email}) {
1483	    $real_address = $mailmap->{addresses}->{$email};
1484	}
1485    } else {
1486	if (exists $mailmap->{names}->{$address}) {
1487	    $real_name = $mailmap->{names}->{$address};
1488	}
1489	if (exists $mailmap->{addresses}->{$address}) {
1490	    $real_address = $mailmap->{addresses}->{$address};
1491	}
1492    }
1493    return format_email($real_name, $real_address, 1);
1494}
1495
1496sub mailmap {
1497    my (@addresses) = @_;
1498
1499    my @mapped_emails = ();
1500    foreach my $line (@addresses) {
1501	push(@mapped_emails, mailmap_email($line));
1502    }
1503    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1504    return @mapped_emails;
1505}
1506
1507sub merge_by_realname {
1508    my %address_map;
1509    my (@emails) = @_;
1510
1511    foreach my $email (@emails) {
1512	my ($name, $address) = parse_email($email);
1513	if (exists $address_map{$name}) {
1514	    $address = $address_map{$name};
1515	    $email = format_email($name, $address, 1);
1516	} else {
1517	    $address_map{$name} = $address;
1518	}
1519    }
1520}
1521
1522sub git_execute_cmd {
1523    my ($cmd) = @_;
1524    my @lines = ();
1525
1526    my $output = `$cmd`;
1527    $output =~ s/^\s*//gm;
1528    @lines = split("\n", $output);
1529
1530    return @lines;
1531}
1532
1533sub hg_execute_cmd {
1534    my ($cmd) = @_;
1535    my @lines = ();
1536
1537    my $output = `$cmd`;
1538    @lines = split("\n", $output);
1539
1540    return @lines;
1541}
1542
1543sub extract_formatted_signatures {
1544    my (@signature_lines) = @_;
1545
1546    my @type = @signature_lines;
1547
1548    s/\s*(.*):.*/$1/ for (@type);
1549
1550    # cut -f2- -d":"
1551    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1552
1553## Reformat email addresses (with names) to avoid badly written signatures
1554
1555    foreach my $signer (@signature_lines) {
1556	$signer = deduplicate_email($signer);
1557    }
1558
1559    return (\@type, \@signature_lines);
1560}
1561
1562sub vcs_find_signers {
1563    my ($cmd, $file) = @_;
1564    my $commits;
1565    my @lines = ();
1566    my @signatures = ();
1567    my @authors = ();
1568    my @stats = ();
1569
1570    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1571
1572    my $pattern = $VCS_cmds{"commit_pattern"};
1573    my $author_pattern = $VCS_cmds{"author_pattern"};
1574    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1575
1576    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1577
1578    $commits = grep(/$pattern/, @lines);	# of commits
1579
1580    @authors = grep(/$author_pattern/, @lines);
1581    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1582    @stats = grep(/$stat_pattern/, @lines);
1583
1584#    print("stats: <@stats>\n");
1585
1586    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1587
1588    save_commits_by_author(@lines) if ($interactive);
1589    save_commits_by_signer(@lines) if ($interactive);
1590
1591    if (!$email_git_penguin_chiefs) {
1592	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1593    }
1594
1595    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1596    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1597
1598    return ($commits, $signers_ref, $authors_ref, \@stats);
1599}
1600
1601sub vcs_find_author {
1602    my ($cmd) = @_;
1603    my @lines = ();
1604
1605    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1606
1607    if (!$email_git_penguin_chiefs) {
1608	@lines = grep(!/${penguin_chiefs}/i, @lines);
1609    }
1610
1611    return @lines if !@lines;
1612
1613    my @authors = ();
1614    foreach my $line (@lines) {
1615	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1616	    my $author = $1;
1617	    my ($name, $address) = parse_email($author);
1618	    $author = format_email($name, $address, 1);
1619	    push(@authors, $author);
1620	}
1621    }
1622
1623    save_commits_by_author(@lines) if ($interactive);
1624    save_commits_by_signer(@lines) if ($interactive);
1625
1626    return @authors;
1627}
1628
1629sub vcs_save_commits {
1630    my ($cmd) = @_;
1631    my @lines = ();
1632    my @commits = ();
1633
1634    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1635
1636    foreach my $line (@lines) {
1637	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1638	    push(@commits, $1);
1639	}
1640    }
1641
1642    return @commits;
1643}
1644
1645sub vcs_blame {
1646    my ($file) = @_;
1647    my $cmd;
1648    my @commits = ();
1649
1650    return @commits if (!(-f $file));
1651
1652    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1653	my @all_commits = ();
1654
1655	$cmd = $VCS_cmds{"blame_file_cmd"};
1656	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1657	@all_commits = vcs_save_commits($cmd);
1658
1659	foreach my $file_range_diff (@range) {
1660	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1661	    my $diff_file = $1;
1662	    my $diff_start = $2;
1663	    my $diff_length = $3;
1664	    next if ("$file" ne "$diff_file");
1665	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1666		push(@commits, $all_commits[$i]);
1667	    }
1668	}
1669    } elsif (@range) {
1670	foreach my $file_range_diff (@range) {
1671	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1672	    my $diff_file = $1;
1673	    my $diff_start = $2;
1674	    my $diff_length = $3;
1675	    next if ("$file" ne "$diff_file");
1676	    $cmd = $VCS_cmds{"blame_range_cmd"};
1677	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1678	    push(@commits, vcs_save_commits($cmd));
1679	}
1680    } else {
1681	$cmd = $VCS_cmds{"blame_file_cmd"};
1682	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1683	@commits = vcs_save_commits($cmd);
1684    }
1685
1686    foreach my $commit (@commits) {
1687	$commit =~ s/^\^//g;
1688    }
1689
1690    return @commits;
1691}
1692
1693my $printed_novcs = 0;
1694sub vcs_exists {
1695    %VCS_cmds = %VCS_cmds_git;
1696    return 1 if eval $VCS_cmds{"available"};
1697    %VCS_cmds = %VCS_cmds_hg;
1698    return 2 if eval $VCS_cmds{"available"};
1699    %VCS_cmds = ();
1700    if (!$printed_novcs) {
1701	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1702	warn("Using a git repository produces better results.\n");
1703	warn("Try Linus Torvalds' latest git repository using:\n");
1704	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1705	$printed_novcs = 1;
1706    }
1707    return 0;
1708}
1709
1710sub vcs_is_git {
1711    vcs_exists();
1712    return $vcs_used == 1;
1713}
1714
1715sub vcs_is_hg {
1716    return $vcs_used == 2;
1717}
1718
1719sub vcs_add_commit_signers {
1720    return if (!vcs_exists());
1721
1722    my ($commit, $desc) = @_;
1723    my $commit_count = 0;
1724    my $commit_authors_ref;
1725    my $commit_signers_ref;
1726    my $stats_ref;
1727    my @commit_authors = ();
1728    my @commit_signers = ();
1729    my $cmd;
1730
1731    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1732    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1733
1734    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, "");
1735    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
1736    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
1737
1738    foreach my $signer (@commit_signers) {
1739	$signer = deduplicate_email($signer);
1740    }
1741
1742    vcs_assign($desc, 1, @commit_signers);
1743}
1744
1745sub interactive_get_maintainers {
1746    my ($list_ref) = @_;
1747    my @list = @$list_ref;
1748
1749    vcs_exists();
1750
1751    my %selected;
1752    my %authored;
1753    my %signed;
1754    my $count = 0;
1755    my $maintained = 0;
1756    foreach my $entry (@list) {
1757	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1758	$selected{$count} = 1;
1759	$authored{$count} = 0;
1760	$signed{$count} = 0;
1761	$count++;
1762    }
1763
1764    #menu loop
1765    my $done = 0;
1766    my $print_options = 0;
1767    my $redraw = 1;
1768    while (!$done) {
1769	$count = 0;
1770	if ($redraw) {
1771	    printf STDERR "\n%1s %2s %-65s",
1772			  "*", "#", "email/list and role:stats";
1773	    if ($email_git ||
1774		($email_git_fallback && !$maintained) ||
1775		$email_git_blame) {
1776		print STDERR "auth sign";
1777	    }
1778	    print STDERR "\n";
1779	    foreach my $entry (@list) {
1780		my $email = $entry->[0];
1781		my $role = $entry->[1];
1782		my $sel = "";
1783		$sel = "*" if ($selected{$count});
1784		my $commit_author = $commit_author_hash{$email};
1785		my $commit_signer = $commit_signer_hash{$email};
1786		my $authored = 0;
1787		my $signed = 0;
1788		$authored++ for (@{$commit_author});
1789		$signed++ for (@{$commit_signer});
1790		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1791		printf STDERR "%4d %4d", $authored, $signed
1792		    if ($authored > 0 || $signed > 0);
1793		printf STDERR "\n     %s\n", $role;
1794		if ($authored{$count}) {
1795		    my $commit_author = $commit_author_hash{$email};
1796		    foreach my $ref (@{$commit_author}) {
1797			print STDERR "     Author: @{$ref}[1]\n";
1798		    }
1799		}
1800		if ($signed{$count}) {
1801		    my $commit_signer = $commit_signer_hash{$email};
1802		    foreach my $ref (@{$commit_signer}) {
1803			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1804		    }
1805		}
1806
1807		$count++;
1808	    }
1809	}
1810	my $date_ref = \$email_git_since;
1811	$date_ref = \$email_hg_since if (vcs_is_hg());
1812	if ($print_options) {
1813	    $print_options = 0;
1814	    if (vcs_exists()) {
1815		print STDERR <<EOT
1816
1817Version Control options:
1818g  use git history      [$email_git]
1819gf use git-fallback     [$email_git_fallback]
1820b  use git blame        [$email_git_blame]
1821bs use blame signatures [$email_git_blame_signatures]
1822c# minimum commits      [$email_git_min_signatures]
1823%# min percent          [$email_git_min_percent]
1824d# history to use       [$$date_ref]
1825x# max maintainers      [$email_git_max_maintainers]
1826t  all signature types  [$email_git_all_signature_types]
1827m  use .mailmap         [$email_use_mailmap]
1828EOT
1829	    }
1830	    print STDERR <<EOT
1831
1832Additional options:
18330  toggle all
1834tm toggle maintainers
1835tg toggle git entries
1836tl toggle open list entries
1837ts toggle subscriber list entries
1838f  emails in file       [$file_emails]
1839k  keywords in file     [$keywords]
1840r  remove duplicates    [$email_remove_duplicates]
1841p# pattern match depth  [$pattern_depth]
1842EOT
1843	}
1844	print STDERR
1845"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1846
1847	my $input = <STDIN>;
1848	chomp($input);
1849
1850	$redraw = 1;
1851	my $rerun = 0;
1852	my @wish = split(/[, ]+/, $input);
1853	foreach my $nr (@wish) {
1854	    $nr = lc($nr);
1855	    my $sel = substr($nr, 0, 1);
1856	    my $str = substr($nr, 1);
1857	    my $val = 0;
1858	    $val = $1 if $str =~ /^(\d+)$/;
1859
1860	    if ($sel eq "y") {
1861		$interactive = 0;
1862		$done = 1;
1863		$output_rolestats = 0;
1864		$output_roles = 0;
1865		last;
1866	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1867		$selected{$nr - 1} = !$selected{$nr - 1};
1868	    } elsif ($sel eq "*" || $sel eq '^') {
1869		my $toggle = 0;
1870		$toggle = 1 if ($sel eq '*');
1871		for (my $i = 0; $i < $count; $i++) {
1872		    $selected{$i} = $toggle;
1873		}
1874	    } elsif ($sel eq "0") {
1875		for (my $i = 0; $i < $count; $i++) {
1876		    $selected{$i} = !$selected{$i};
1877		}
1878	    } elsif ($sel eq "t") {
1879		if (lc($str) eq "m") {
1880		    for (my $i = 0; $i < $count; $i++) {
1881			$selected{$i} = !$selected{$i}
1882			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1883		    }
1884		} elsif (lc($str) eq "g") {
1885		    for (my $i = 0; $i < $count; $i++) {
1886			$selected{$i} = !$selected{$i}
1887			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1888		    }
1889		} elsif (lc($str) eq "l") {
1890		    for (my $i = 0; $i < $count; $i++) {
1891			$selected{$i} = !$selected{$i}
1892			    if ($list[$i]->[1] =~ /^(open list)/i);
1893		    }
1894		} elsif (lc($str) eq "s") {
1895		    for (my $i = 0; $i < $count; $i++) {
1896			$selected{$i} = !$selected{$i}
1897			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1898		    }
1899		}
1900	    } elsif ($sel eq "a") {
1901		if ($val > 0 && $val <= $count) {
1902		    $authored{$val - 1} = !$authored{$val - 1};
1903		} elsif ($str eq '*' || $str eq '^') {
1904		    my $toggle = 0;
1905		    $toggle = 1 if ($str eq '*');
1906		    for (my $i = 0; $i < $count; $i++) {
1907			$authored{$i} = $toggle;
1908		    }
1909		}
1910	    } elsif ($sel eq "s") {
1911		if ($val > 0 && $val <= $count) {
1912		    $signed{$val - 1} = !$signed{$val - 1};
1913		} elsif ($str eq '*' || $str eq '^') {
1914		    my $toggle = 0;
1915		    $toggle = 1 if ($str eq '*');
1916		    for (my $i = 0; $i < $count; $i++) {
1917			$signed{$i} = $toggle;
1918		    }
1919		}
1920	    } elsif ($sel eq "o") {
1921		$print_options = 1;
1922		$redraw = 1;
1923	    } elsif ($sel eq "g") {
1924		if ($str eq "f") {
1925		    bool_invert(\$email_git_fallback);
1926		} else {
1927		    bool_invert(\$email_git);
1928		}
1929		$rerun = 1;
1930	    } elsif ($sel eq "b") {
1931		if ($str eq "s") {
1932		    bool_invert(\$email_git_blame_signatures);
1933		} else {
1934		    bool_invert(\$email_git_blame);
1935		}
1936		$rerun = 1;
1937	    } elsif ($sel eq "c") {
1938		if ($val > 0) {
1939		    $email_git_min_signatures = $val;
1940		    $rerun = 1;
1941		}
1942	    } elsif ($sel eq "x") {
1943		if ($val > 0) {
1944		    $email_git_max_maintainers = $val;
1945		    $rerun = 1;
1946		}
1947	    } elsif ($sel eq "%") {
1948		if ($str ne "" && $val >= 0) {
1949		    $email_git_min_percent = $val;
1950		    $rerun = 1;
1951		}
1952	    } elsif ($sel eq "d") {
1953		if (vcs_is_git()) {
1954		    $email_git_since = $str;
1955		} elsif (vcs_is_hg()) {
1956		    $email_hg_since = $str;
1957		}
1958		$rerun = 1;
1959	    } elsif ($sel eq "t") {
1960		bool_invert(\$email_git_all_signature_types);
1961		$rerun = 1;
1962	    } elsif ($sel eq "f") {
1963		bool_invert(\$file_emails);
1964		$rerun = 1;
1965	    } elsif ($sel eq "r") {
1966		bool_invert(\$email_remove_duplicates);
1967		$rerun = 1;
1968	    } elsif ($sel eq "m") {
1969		bool_invert(\$email_use_mailmap);
1970		read_mailmap();
1971		$rerun = 1;
1972	    } elsif ($sel eq "k") {
1973		bool_invert(\$keywords);
1974		$rerun = 1;
1975	    } elsif ($sel eq "p") {
1976		if ($str ne "" && $val >= 0) {
1977		    $pattern_depth = $val;
1978		    $rerun = 1;
1979		}
1980	    } elsif ($sel eq "h" || $sel eq "?") {
1981		print STDERR <<EOT
1982
1983Interactive mode allows you to select the various maintainers, submitters,
1984commit signers and mailing lists that could be CC'd on a patch.
1985
1986Any *'d entry is selected.
1987
1988If you have git or hg installed, you can choose to summarize the commit
1989history of files in the patch.  Also, each line of the current file can
1990be matched to its commit author and that commits signers with blame.
1991
1992Various knobs exist to control the length of time for active commit
1993tracking, the maximum number of commit authors and signers to add,
1994and such.
1995
1996Enter selections at the prompt until you are satisfied that the selected
1997maintainers are appropriate.  You may enter multiple selections separated
1998by either commas or spaces.
1999
2000EOT
2001	    } else {
2002		print STDERR "invalid option: '$nr'\n";
2003		$redraw = 0;
2004	    }
2005	}
2006	if ($rerun) {
2007	    print STDERR "git-blame can be very slow, please have patience..."
2008		if ($email_git_blame);
2009	    goto &get_maintainers;
2010	}
2011    }
2012
2013    #drop not selected entries
2014    $count = 0;
2015    my @new_emailto = ();
2016    foreach my $entry (@list) {
2017	if ($selected{$count}) {
2018	    push(@new_emailto, $list[$count]);
2019	}
2020	$count++;
2021    }
2022    return @new_emailto;
2023}
2024
2025sub bool_invert {
2026    my ($bool_ref) = @_;
2027
2028    if ($$bool_ref) {
2029	$$bool_ref = 0;
2030    } else {
2031	$$bool_ref = 1;
2032    }
2033}
2034
2035sub deduplicate_email {
2036    my ($email) = @_;
2037
2038    my $matched = 0;
2039    my ($name, $address) = parse_email($email);
2040    $email = format_email($name, $address, 1);
2041    $email = mailmap_email($email);
2042
2043    return $email if (!$email_remove_duplicates);
2044
2045    ($name, $address) = parse_email($email);
2046
2047    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
2048	$name = $deduplicate_name_hash{lc($name)}->[0];
2049	$address = $deduplicate_name_hash{lc($name)}->[1];
2050	$matched = 1;
2051    } elsif ($deduplicate_address_hash{lc($address)}) {
2052	$name = $deduplicate_address_hash{lc($address)}->[0];
2053	$address = $deduplicate_address_hash{lc($address)}->[1];
2054	$matched = 1;
2055    }
2056    if (!$matched) {
2057	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
2058	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
2059    }
2060    $email = format_email($name, $address, 1);
2061    $email = mailmap_email($email);
2062    return $email;
2063}
2064
2065sub save_commits_by_author {
2066    my (@lines) = @_;
2067
2068    my @authors = ();
2069    my @commits = ();
2070    my @subjects = ();
2071
2072    foreach my $line (@lines) {
2073	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2074	    my $author = $1;
2075	    $author = deduplicate_email($author);
2076	    push(@authors, $author);
2077	}
2078	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2079	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2080    }
2081
2082    for (my $i = 0; $i < @authors; $i++) {
2083	my $exists = 0;
2084	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
2085	    if (@{$ref}[0] eq $commits[$i] &&
2086		@{$ref}[1] eq $subjects[$i]) {
2087		$exists = 1;
2088		last;
2089	    }
2090	}
2091	if (!$exists) {
2092	    push(@{$commit_author_hash{$authors[$i]}},
2093		 [ ($commits[$i], $subjects[$i]) ]);
2094	}
2095    }
2096}
2097
2098sub save_commits_by_signer {
2099    my (@lines) = @_;
2100
2101    my $commit = "";
2102    my $subject = "";
2103
2104    foreach my $line (@lines) {
2105	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2106	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2107	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
2108	    my @signatures = ($line);
2109	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
2110	    my @types = @$types_ref;
2111	    my @signers = @$signers_ref;
2112
2113	    my $type = $types[0];
2114	    my $signer = $signers[0];
2115
2116	    $signer = deduplicate_email($signer);
2117
2118	    my $exists = 0;
2119	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
2120		if (@{$ref}[0] eq $commit &&
2121		    @{$ref}[1] eq $subject &&
2122		    @{$ref}[2] eq $type) {
2123		    $exists = 1;
2124		    last;
2125		}
2126	    }
2127	    if (!$exists) {
2128		push(@{$commit_signer_hash{$signer}},
2129		     [ ($commit, $subject, $type) ]);
2130	    }
2131	}
2132    }
2133}
2134
2135sub vcs_assign {
2136    my ($role, $divisor, @lines) = @_;
2137
2138    my %hash;
2139    my $count = 0;
2140
2141    return if (@lines <= 0);
2142
2143    if ($divisor <= 0) {
2144	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
2145	$divisor = 1;
2146    }
2147
2148    @lines = mailmap(@lines);
2149
2150    return if (@lines <= 0);
2151
2152    @lines = sort(@lines);
2153
2154    # uniq -c
2155    $hash{$_}++ for @lines;
2156
2157    # sort -rn
2158    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
2159	my $sign_offs = $hash{$line};
2160	my $percent = $sign_offs * 100 / $divisor;
2161
2162	$percent = 100 if ($percent > 100);
2163	next if (ignore_email_address($line));
2164	$count++;
2165	last if ($sign_offs < $email_git_min_signatures ||
2166		 $count > $email_git_max_maintainers ||
2167		 $percent < $email_git_min_percent);
2168	push_email_address($line, '');
2169	if ($output_rolestats) {
2170	    my $fmt_percent = sprintf("%.0f", $percent);
2171	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
2172	} else {
2173	    add_role($line, $role);
2174	}
2175    }
2176}
2177
2178sub vcs_file_signoffs {
2179    my ($file) = @_;
2180
2181    my $authors_ref;
2182    my $signers_ref;
2183    my $stats_ref;
2184    my @authors = ();
2185    my @signers = ();
2186    my @stats = ();
2187    my $commits;
2188
2189    $vcs_used = vcs_exists();
2190    return if (!$vcs_used);
2191
2192    my $cmd = $VCS_cmds{"find_signers_cmd"};
2193    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2194
2195    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2196
2197    @signers = @{$signers_ref} if defined $signers_ref;
2198    @authors = @{$authors_ref} if defined $authors_ref;
2199    @stats = @{$stats_ref} if defined $stats_ref;
2200
2201#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
2202
2203    foreach my $signer (@signers) {
2204	$signer = deduplicate_email($signer);
2205    }
2206
2207    vcs_assign("commit_signer", $commits, @signers);
2208    vcs_assign("authored", $commits, @authors);
2209    if ($#authors == $#stats) {
2210	my $stat_pattern = $VCS_cmds{"stat_pattern"};
2211	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
2212
2213	my $added = 0;
2214	my $deleted = 0;
2215	for (my $i = 0; $i <= $#stats; $i++) {
2216	    if ($stats[$i] =~ /$stat_pattern/) {
2217		$added += $1;
2218		$deleted += $2;
2219	    }
2220	}
2221	my @tmp_authors = uniq(@authors);
2222	foreach my $author (@tmp_authors) {
2223	    $author = deduplicate_email($author);
2224	}
2225	@tmp_authors = uniq(@tmp_authors);
2226	my @list_added = ();
2227	my @list_deleted = ();
2228	foreach my $author (@tmp_authors) {
2229	    my $auth_added = 0;
2230	    my $auth_deleted = 0;
2231	    for (my $i = 0; $i <= $#stats; $i++) {
2232		if ($author eq deduplicate_email($authors[$i]) &&
2233		    $stats[$i] =~ /$stat_pattern/) {
2234		    $auth_added += $1;
2235		    $auth_deleted += $2;
2236		}
2237	    }
2238	    for (my $i = 0; $i < $auth_added; $i++) {
2239		push(@list_added, $author);
2240	    }
2241	    for (my $i = 0; $i < $auth_deleted; $i++) {
2242		push(@list_deleted, $author);
2243	    }
2244	}
2245	vcs_assign("added_lines", $added, @list_added);
2246	vcs_assign("removed_lines", $deleted, @list_deleted);
2247    }
2248}
2249
2250sub vcs_file_blame {
2251    my ($file) = @_;
2252
2253    my @signers = ();
2254    my @all_commits = ();
2255    my @commits = ();
2256    my $total_commits;
2257    my $total_lines;
2258
2259    $vcs_used = vcs_exists();
2260    return if (!$vcs_used);
2261
2262    @all_commits = vcs_blame($file);
2263    @commits = uniq(@all_commits);
2264    $total_commits = @commits;
2265    $total_lines = @all_commits;
2266
2267    if ($email_git_blame_signatures) {
2268	if (vcs_is_hg()) {
2269	    my $commit_count;
2270	    my $commit_authors_ref;
2271	    my $commit_signers_ref;
2272	    my $stats_ref;
2273	    my @commit_authors = ();
2274	    my @commit_signers = ();
2275	    my $commit = join(" -r ", @commits);
2276	    my $cmd;
2277
2278	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
2279	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2280
2281	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2282	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2283	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2284
2285	    push(@signers, @commit_signers);
2286	} else {
2287	    foreach my $commit (@commits) {
2288		my $commit_count;
2289		my $commit_authors_ref;
2290		my $commit_signers_ref;
2291		my $stats_ref;
2292		my @commit_authors = ();
2293		my @commit_signers = ();
2294		my $cmd;
2295
2296		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
2297		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2298
2299		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2300		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2301		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2302
2303		push(@signers, @commit_signers);
2304	    }
2305	}
2306    }
2307
2308    if ($from_filename) {
2309	if ($output_rolestats) {
2310	    my @blame_signers;
2311	    if (vcs_is_hg()) {{		# Double brace for last exit
2312		my $commit_count;
2313		my @commit_signers = ();
2314		@commits = uniq(@commits);
2315		@commits = sort(@commits);
2316		my $commit = join(" -r ", @commits);
2317		my $cmd;
2318
2319		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2320		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2321
2322		my @lines = ();
2323
2324		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2325
2326		if (!$email_git_penguin_chiefs) {
2327		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2328		}
2329
2330		last if !@lines;
2331
2332		my @authors = ();
2333		foreach my $line (@lines) {
2334		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2335			my $author = $1;
2336			$author = deduplicate_email($author);
2337			push(@authors, $author);
2338		    }
2339		}
2340
2341		save_commits_by_author(@lines) if ($interactive);
2342		save_commits_by_signer(@lines) if ($interactive);
2343
2344		push(@signers, @authors);
2345	    }}
2346	    else {
2347		foreach my $commit (@commits) {
2348		    my $i;
2349		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2350		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2351		    my @author = vcs_find_author($cmd);
2352		    next if !@author;
2353
2354		    my $formatted_author = deduplicate_email($author[0]);
2355
2356		    my $count = grep(/$commit/, @all_commits);
2357		    for ($i = 0; $i < $count ; $i++) {
2358			push(@blame_signers, $formatted_author);
2359		    }
2360		}
2361	    }
2362	    if (@blame_signers) {
2363		vcs_assign("authored lines", $total_lines, @blame_signers);
2364	    }
2365	}
2366	foreach my $signer (@signers) {
2367	    $signer = deduplicate_email($signer);
2368	}
2369	vcs_assign("commits", $total_commits, @signers);
2370    } else {
2371	foreach my $signer (@signers) {
2372	    $signer = deduplicate_email($signer);
2373	}
2374	vcs_assign("modified commits", $total_commits, @signers);
2375    }
2376}
2377
2378sub vcs_file_exists {
2379    my ($file) = @_;
2380
2381    my $exists;
2382
2383    my $vcs_used = vcs_exists();
2384    return 0 if (!$vcs_used);
2385
2386    my $cmd = $VCS_cmds{"file_exists_cmd"};
2387    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2388    $cmd .= " 2>&1";
2389    $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
2390
2391    return 0 if ($? != 0);
2392
2393    return $exists;
2394}
2395
2396sub vcs_list_files {
2397    my ($file) = @_;
2398
2399    my @lsfiles = ();
2400
2401    my $vcs_used = vcs_exists();
2402    return 0 if (!$vcs_used);
2403
2404    my $cmd = $VCS_cmds{"list_files_cmd"};
2405    $cmd =~ s/(\$\w+)/$1/eeg;   # interpolate $cmd
2406    @lsfiles = &{$VCS_cmds{"execute_cmd"}}($cmd);
2407
2408    return () if ($? != 0);
2409
2410    return @lsfiles;
2411}
2412
2413sub uniq {
2414    my (@parms) = @_;
2415
2416    my %saw;
2417    @parms = grep(!$saw{$_}++, @parms);
2418    return @parms;
2419}
2420
2421sub sort_and_uniq {
2422    my (@parms) = @_;
2423
2424    my %saw;
2425    @parms = sort @parms;
2426    @parms = grep(!$saw{$_}++, @parms);
2427    return @parms;
2428}
2429
2430sub clean_file_emails {
2431    my (@file_emails) = @_;
2432    my @fmt_emails = ();
2433
2434    foreach my $email (@file_emails) {
2435	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2436	my ($name, $address) = parse_email($email);
2437	if ($name eq '"[,\.]"') {
2438	    $name = "";
2439	}
2440
2441	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2442	if (@nw > 2) {
2443	    my $first = $nw[@nw - 3];
2444	    my $middle = $nw[@nw - 2];
2445	    my $last = $nw[@nw - 1];
2446
2447	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2448		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2449		(length($middle) == 1 ||
2450		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2451		$name = "$first $middle $last";
2452	    } else {
2453		$name = "$middle $last";
2454	    }
2455	}
2456
2457	if (substr($name, -1) =~ /[,\.]/) {
2458	    $name = substr($name, 0, length($name) - 1);
2459	} elsif (substr($name, -2) =~ /[,\.]"/) {
2460	    $name = substr($name, 0, length($name) - 2) . '"';
2461	}
2462
2463	if (substr($name, 0, 1) =~ /[,\.]/) {
2464	    $name = substr($name, 1, length($name) - 1);
2465	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2466	    $name = '"' . substr($name, 2, length($name) - 2);
2467	}
2468
2469	my $fmt_email = format_email($name, $address, $email_usename);
2470	push(@fmt_emails, $fmt_email);
2471    }
2472    return @fmt_emails;
2473}
2474
2475sub merge_email {
2476    my @lines;
2477    my %saw;
2478
2479    for (@_) {
2480	my ($address, $role) = @$_;
2481	if (!$saw{$address}) {
2482	    if ($output_roles) {
2483		push(@lines, "$address ($role)");
2484	    } else {
2485		push(@lines, $address);
2486	    }
2487	    $saw{$address} = 1;
2488	}
2489    }
2490
2491    return @lines;
2492}
2493
2494sub output {
2495    my (@parms) = @_;
2496
2497    if ($output_multiline) {
2498	foreach my $line (@parms) {
2499	    print("${line}\n");
2500	}
2501    } else {
2502	print(join($output_separator, @parms));
2503	print("\n");
2504    }
2505}
2506
2507my $rfc822re;
2508
2509sub make_rfc822re {
2510#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2511#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2512#   This regexp will only work on addresses which have had comments stripped
2513#   and replaced with rfc822_lwsp.
2514
2515    my $specials = '()<>@,;:\\\\".\\[\\]';
2516    my $controls = '\\000-\\037\\177';
2517
2518    my $dtext = "[^\\[\\]\\r\\\\]";
2519    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2520
2521    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2522
2523#   Use zero-width assertion to spot the limit of an atom.  A simple
2524#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2525    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2526    my $word = "(?:$atom|$quoted_string)";
2527    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2528
2529    my $sub_domain = "(?:$atom|$domain_literal)";
2530    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2531
2532    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2533
2534    my $phrase = "$word*";
2535    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2536    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2537    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2538
2539    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2540    my $address = "(?:$mailbox|$group)";
2541
2542    return "$rfc822_lwsp*$address";
2543}
2544
2545sub rfc822_strip_comments {
2546    my $s = shift;
2547#   Recursively remove comments, and replace with a single space.  The simpler
2548#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2549#   chars in atoms, for example.
2550
2551    while ($s =~ s/^((?:[^"\\]|\\.)*
2552                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2553                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2554    return $s;
2555}
2556
2557#   valid: returns true if the parameter is an RFC822 valid address
2558#
2559sub rfc822_valid {
2560    my $s = rfc822_strip_comments(shift);
2561
2562    if (!$rfc822re) {
2563        $rfc822re = make_rfc822re();
2564    }
2565
2566    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2567}
2568
2569#   validlist: In scalar context, returns true if the parameter is an RFC822
2570#              valid list of addresses.
2571#
2572#              In list context, returns an empty list on failure (an invalid
2573#              address was found); otherwise a list whose first element is the
2574#              number of addresses found and whose remaining elements are the
2575#              addresses.  This is needed to disambiguate failure (invalid)
2576#              from success with no addresses found, because an empty string is
2577#              a valid list.
2578
2579sub rfc822_validlist {
2580    my $s = rfc822_strip_comments(shift);
2581
2582    if (!$rfc822re) {
2583        $rfc822re = make_rfc822re();
2584    }
2585    # * null list items are valid according to the RFC
2586    # * the '1' business is to aid in distinguishing failure from no results
2587
2588    my @r;
2589    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2590	$s =~ m/^$rfc822_char*$/) {
2591        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2592            push(@r, $1);
2593        }
2594        return wantarray ? (scalar(@r), @r) : 1;
2595    }
2596    return wantarray ? () : 0;
2597}
2598