xref: /openbmc/linux/scripts/get_maintainer.pl (revision ba61bb17)
1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
4# (c) 2007, Joe Perches <joe@perches.com>
5#           created from checkpatch.pl
6#
7# Print selected MAINTAINERS information for
8# the files modified in a patch or for a file
9#
10# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
11#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
12
13use warnings;
14use strict;
15
16my $P = $0;
17my $V = '0.26';
18
19use Getopt::Long qw(:config no_auto_abbrev);
20use Cwd;
21use File::Find;
22
23my $cur_path = fastgetcwd() . '/';
24my $lk_path = "./";
25my $email = 1;
26my $email_usename = 1;
27my $email_maintainer = 1;
28my $email_reviewer = 1;
29my $email_list = 1;
30my $email_subscriber_list = 0;
31my $email_git_penguin_chiefs = 0;
32my $email_git = 0;
33my $email_git_all_signature_types = 0;
34my $email_git_blame = 0;
35my $email_git_blame_signatures = 1;
36my $email_git_fallback = 1;
37my $email_git_min_signatures = 1;
38my $email_git_max_maintainers = 5;
39my $email_git_min_percent = 5;
40my $email_git_since = "1-year-ago";
41my $email_hg_since = "-365";
42my $interactive = 0;
43my $email_remove_duplicates = 1;
44my $email_use_mailmap = 1;
45my $output_multiline = 1;
46my $output_separator = ", ";
47my $output_roles = 0;
48my $output_rolestats = 1;
49my $output_section_maxlen = 50;
50my $scm = 0;
51my $web = 0;
52my $subsystem = 0;
53my $status = 0;
54my $letters = "";
55my $keywords = 1;
56my $sections = 0;
57my $file_emails = 0;
58my $from_filename = 0;
59my $pattern_depth = 0;
60my $self_test = undef;
61my $version = 0;
62my $help = 0;
63my $find_maintainer_files = 0;
64
65my $vcs_used = 0;
66
67my $exit = 0;
68
69my %commit_author_hash;
70my %commit_signer_hash;
71
72my @penguin_chief = ();
73push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
74#Andrew wants in on most everything - 2009/01/14
75#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
76
77my @penguin_chief_names = ();
78foreach my $chief (@penguin_chief) {
79    if ($chief =~ m/^(.*):(.*)/) {
80	my $chief_name = $1;
81	my $chief_addr = $2;
82	push(@penguin_chief_names, $chief_name);
83    }
84}
85my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
86
87# Signature types of people who are either
88# 	a) responsible for the code in question, or
89# 	b) familiar enough with it to give relevant feedback
90my @signature_tags = ();
91push(@signature_tags, "Signed-off-by:");
92push(@signature_tags, "Reviewed-by:");
93push(@signature_tags, "Acked-by:");
94
95my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
96
97# rfc822 email address - preloaded methods go here.
98my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
99my $rfc822_char = '[\\000-\\377]';
100
101# VCS command support: class-like functions and strings
102
103my %VCS_cmds;
104
105my %VCS_cmds_git = (
106    "execute_cmd" => \&git_execute_cmd,
107    "available" => '(which("git") ne "") && (-e ".git")',
108    "find_signers_cmd" =>
109	"git log --no-color --follow --since=\$email_git_since " .
110	    '--numstat --no-merges ' .
111	    '--format="GitCommit: %H%n' .
112		      'GitAuthor: %an <%ae>%n' .
113		      'GitDate: %aD%n' .
114		      'GitSubject: %s%n' .
115		      '%b%n"' .
116	    " -- \$file",
117    "find_commit_signers_cmd" =>
118	"git log --no-color " .
119	    '--numstat ' .
120	    '--format="GitCommit: %H%n' .
121		      'GitAuthor: %an <%ae>%n' .
122		      'GitDate: %aD%n' .
123		      'GitSubject: %s%n' .
124		      '%b%n"' .
125	    " -1 \$commit",
126    "find_commit_author_cmd" =>
127	"git log --no-color " .
128	    '--numstat ' .
129	    '--format="GitCommit: %H%n' .
130		      'GitAuthor: %an <%ae>%n' .
131		      'GitDate: %aD%n' .
132		      'GitSubject: %s%n"' .
133	    " -1 \$commit",
134    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
135    "blame_file_cmd" => "git blame -l \$file",
136    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
137    "blame_commit_pattern" => "^([0-9a-f]+) ",
138    "author_pattern" => "^GitAuthor: (.*)",
139    "subject_pattern" => "^GitSubject: (.*)",
140    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
141    "file_exists_cmd" => "git ls-files \$file",
142    "list_files_cmd" => "git ls-files \$file",
143);
144
145my %VCS_cmds_hg = (
146    "execute_cmd" => \&hg_execute_cmd,
147    "available" => '(which("hg") ne "") && (-d ".hg")',
148    "find_signers_cmd" =>
149	"hg log --date=\$email_hg_since " .
150	    "--template='HgCommit: {node}\\n" .
151	                "HgAuthor: {author}\\n" .
152			"HgSubject: {desc}\\n'" .
153	    " -- \$file",
154    "find_commit_signers_cmd" =>
155	"hg log " .
156	    "--template='HgSubject: {desc}\\n'" .
157	    " -r \$commit",
158    "find_commit_author_cmd" =>
159	"hg log " .
160	    "--template='HgCommit: {node}\\n" .
161		        "HgAuthor: {author}\\n" .
162			"HgSubject: {desc|firstline}\\n'" .
163	    " -r \$commit",
164    "blame_range_cmd" => "",		# not supported
165    "blame_file_cmd" => "hg blame -n \$file",
166    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
167    "blame_commit_pattern" => "^([ 0-9a-f]+):",
168    "author_pattern" => "^HgAuthor: (.*)",
169    "subject_pattern" => "^HgSubject: (.*)",
170    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
171    "file_exists_cmd" => "hg files \$file",
172    "list_files_cmd" => "hg manifest -R \$file",
173);
174
175my $conf = which_conf(".get_maintainer.conf");
176if (-f $conf) {
177    my @conf_args;
178    open(my $conffile, '<', "$conf")
179	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
180
181    while (<$conffile>) {
182	my $line = $_;
183
184	$line =~ s/\s*\n?$//g;
185	$line =~ s/^\s*//g;
186	$line =~ s/\s+/ /g;
187
188	next if ($line =~ m/^\s*#/);
189	next if ($line =~ m/^\s*$/);
190
191	my @words = split(" ", $line);
192	foreach my $word (@words) {
193	    last if ($word =~ m/^#/);
194	    push (@conf_args, $word);
195	}
196    }
197    close($conffile);
198    unshift(@ARGV, @conf_args) if @conf_args;
199}
200
201my @ignore_emails = ();
202my $ignore_file = which_conf(".get_maintainer.ignore");
203if (-f $ignore_file) {
204    open(my $ignore, '<', "$ignore_file")
205	or warn "$P: Can't find a readable .get_maintainer.ignore file $!\n";
206    while (<$ignore>) {
207	my $line = $_;
208
209	$line =~ s/\s*\n?$//;
210	$line =~ s/^\s*//;
211	$line =~ s/\s+$//;
212	$line =~ s/#.*$//;
213
214	next if ($line =~ m/^\s*$/);
215	if (rfc822_valid($line)) {
216	    push(@ignore_emails, $line);
217	}
218    }
219    close($ignore);
220}
221
222if ($#ARGV > 0) {
223    foreach (@ARGV) {
224        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
225            die "$P: using --self-test does not allow any other option or argument\n";
226        }
227    }
228}
229
230if (!GetOptions(
231		'email!' => \$email,
232		'git!' => \$email_git,
233		'git-all-signature-types!' => \$email_git_all_signature_types,
234		'git-blame!' => \$email_git_blame,
235		'git-blame-signatures!' => \$email_git_blame_signatures,
236		'git-fallback!' => \$email_git_fallback,
237		'git-chief-penguins!' => \$email_git_penguin_chiefs,
238		'git-min-signatures=i' => \$email_git_min_signatures,
239		'git-max-maintainers=i' => \$email_git_max_maintainers,
240		'git-min-percent=i' => \$email_git_min_percent,
241		'git-since=s' => \$email_git_since,
242		'hg-since=s' => \$email_hg_since,
243		'i|interactive!' => \$interactive,
244		'remove-duplicates!' => \$email_remove_duplicates,
245		'mailmap!' => \$email_use_mailmap,
246		'm!' => \$email_maintainer,
247		'r!' => \$email_reviewer,
248		'n!' => \$email_usename,
249		'l!' => \$email_list,
250		's!' => \$email_subscriber_list,
251		'multiline!' => \$output_multiline,
252		'roles!' => \$output_roles,
253		'rolestats!' => \$output_rolestats,
254		'separator=s' => \$output_separator,
255		'subsystem!' => \$subsystem,
256		'status!' => \$status,
257		'scm!' => \$scm,
258		'web!' => \$web,
259		'letters=s' => \$letters,
260		'pattern-depth=i' => \$pattern_depth,
261		'k|keywords!' => \$keywords,
262		'sections!' => \$sections,
263		'fe|file-emails!' => \$file_emails,
264		'f|file' => \$from_filename,
265		'find-maintainer-files' => \$find_maintainer_files,
266		'self-test:s' => \$self_test,
267		'v|version' => \$version,
268		'h|help|usage' => \$help,
269		)) {
270    die "$P: invalid argument - use --help if necessary\n";
271}
272
273if ($help != 0) {
274    usage();
275    exit 0;
276}
277
278if ($version != 0) {
279    print("${P} ${V}\n");
280    exit 0;
281}
282
283if (defined $self_test) {
284    read_all_maintainer_files();
285    self_test();
286    exit 0;
287}
288
289if (-t STDIN && !@ARGV) {
290    # We're talking to a terminal, but have no command line arguments.
291    die "$P: missing patchfile or -f file - use --help if necessary\n";
292}
293
294$output_multiline = 0 if ($output_separator ne ", ");
295$output_rolestats = 1 if ($interactive);
296$output_roles = 1 if ($output_rolestats);
297
298if ($sections || $letters ne "") {
299    $sections = 1;
300    $email = 0;
301    $email_list = 0;
302    $scm = 0;
303    $status = 0;
304    $subsystem = 0;
305    $web = 0;
306    $keywords = 0;
307    $interactive = 0;
308} else {
309    my $selections = $email + $scm + $status + $subsystem + $web;
310    if ($selections == 0) {
311	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
312    }
313}
314
315if ($email &&
316    ($email_maintainer + $email_reviewer +
317     $email_list + $email_subscriber_list +
318     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
319    die "$P: Please select at least 1 email option\n";
320}
321
322if (!top_of_kernel_tree($lk_path)) {
323    die "$P: The current directory does not appear to be "
324	. "a linux kernel source tree.\n";
325}
326
327## Read MAINTAINERS for type/value pairs
328
329my @typevalue = ();
330my %keyword_hash;
331my @mfiles = ();
332my @self_test_info = ();
333
334sub read_maintainer_file {
335    my ($file) = @_;
336
337    open (my $maint, '<', "$file")
338	or die "$P: Can't open MAINTAINERS file '$file': $!\n";
339    my $i = 1;
340    while (<$maint>) {
341	my $line = $_;
342	chomp $line;
343
344	if ($line =~ m/^([A-Z]):\s*(.*)/) {
345	    my $type = $1;
346	    my $value = $2;
347
348	    ##Filename pattern matching
349	    if ($type eq "F" || $type eq "X") {
350		$value =~ s@\.@\\\.@g;       ##Convert . to \.
351		$value =~ s/\*/\.\*/g;       ##Convert * to .*
352		$value =~ s/\?/\./g;         ##Convert ? to .
353		##if pattern is a directory and it lacks a trailing slash, add one
354		if ((-d $value)) {
355		    $value =~ s@([^/])$@$1/@;
356		}
357	    } elsif ($type eq "K") {
358		$keyword_hash{@typevalue} = $value;
359	    }
360	    push(@typevalue, "$type:$value");
361	} elsif (!(/^\s*$/ || /^\s*\#/)) {
362	    push(@typevalue, $line);
363	}
364	if (defined $self_test) {
365	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
366	}
367	$i++;
368    }
369    close($maint);
370}
371
372sub find_is_maintainer_file {
373    my ($file) = $_;
374    return if ($file !~ m@/MAINTAINERS$@);
375    $file = $File::Find::name;
376    return if (! -f $file);
377    push(@mfiles, $file);
378}
379
380sub find_ignore_git {
381    return grep { $_ !~ /^\.git$/; } @_;
382}
383
384read_all_maintainer_files();
385
386sub read_all_maintainer_files {
387    if (-d "${lk_path}MAINTAINERS") {
388        opendir(DIR, "${lk_path}MAINTAINERS") or die $!;
389        my @files = readdir(DIR);
390        closedir(DIR);
391        foreach my $file (@files) {
392            push(@mfiles, "${lk_path}MAINTAINERS/$file") if ($file !~ /^\./);
393        }
394    }
395
396    if ($find_maintainer_files) {
397        find( { wanted => \&find_is_maintainer_file,
398                preprocess => \&find_ignore_git,
399                no_chdir => 1,
400        }, "${lk_path}");
401    } else {
402        push(@mfiles, "${lk_path}MAINTAINERS") if -f "${lk_path}MAINTAINERS";
403    }
404
405    foreach my $file (@mfiles) {
406        read_maintainer_file("$file");
407    }
408}
409
410#
411# Read mail address map
412#
413
414my $mailmap;
415
416read_mailmap();
417
418sub read_mailmap {
419    $mailmap = {
420	names => {},
421	addresses => {}
422    };
423
424    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
425
426    open(my $mailmap_file, '<', "${lk_path}.mailmap")
427	or warn "$P: Can't open .mailmap: $!\n";
428
429    while (<$mailmap_file>) {
430	s/#.*$//; #strip comments
431	s/^\s+|\s+$//g; #trim
432
433	next if (/^\s*$/); #skip empty lines
434	#entries have one of the following formats:
435	# name1 <mail1>
436	# <mail1> <mail2>
437	# name1 <mail1> <mail2>
438	# name1 <mail1> name2 <mail2>
439	# (see man git-shortlog)
440
441	if (/^([^<]+)<([^>]+)>$/) {
442	    my $real_name = $1;
443	    my $address = $2;
444
445	    $real_name =~ s/\s+$//;
446	    ($real_name, $address) = parse_email("$real_name <$address>");
447	    $mailmap->{names}->{$address} = $real_name;
448
449	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
450	    my $real_address = $1;
451	    my $wrong_address = $2;
452
453	    $mailmap->{addresses}->{$wrong_address} = $real_address;
454
455	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
456	    my $real_name = $1;
457	    my $real_address = $2;
458	    my $wrong_address = $3;
459
460	    $real_name =~ s/\s+$//;
461	    ($real_name, $real_address) =
462		parse_email("$real_name <$real_address>");
463	    $mailmap->{names}->{$wrong_address} = $real_name;
464	    $mailmap->{addresses}->{$wrong_address} = $real_address;
465
466	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
467	    my $real_name = $1;
468	    my $real_address = $2;
469	    my $wrong_name = $3;
470	    my $wrong_address = $4;
471
472	    $real_name =~ s/\s+$//;
473	    ($real_name, $real_address) =
474		parse_email("$real_name <$real_address>");
475
476	    $wrong_name =~ s/\s+$//;
477	    ($wrong_name, $wrong_address) =
478		parse_email("$wrong_name <$wrong_address>");
479
480	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
481	    $mailmap->{names}->{$wrong_email} = $real_name;
482	    $mailmap->{addresses}->{$wrong_email} = $real_address;
483	}
484    }
485    close($mailmap_file);
486}
487
488## use the filenames on the command line or find the filenames in the patchfiles
489
490my @files = ();
491my @range = ();
492my @keyword_tvi = ();
493my @file_emails = ();
494
495if (!@ARGV) {
496    push(@ARGV, "&STDIN");
497}
498
499foreach my $file (@ARGV) {
500    if ($file ne "&STDIN") {
501	##if $file is a directory and it lacks a trailing slash, add one
502	if ((-d $file)) {
503	    $file =~ s@([^/])$@$1/@;
504	} elsif (!(-f $file)) {
505	    die "$P: file '${file}' not found\n";
506	}
507    }
508    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
509	$file =~ s/^\Q${cur_path}\E//;	#strip any absolute path
510	$file =~ s/^\Q${lk_path}\E//;	#or the path to the lk tree
511	push(@files, $file);
512	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
513	    open(my $f, '<', $file)
514		or die "$P: Can't open $file: $!\n";
515	    my $text = do { local($/) ; <$f> };
516	    close($f);
517	    if ($keywords) {
518		foreach my $line (keys %keyword_hash) {
519		    if ($text =~ m/$keyword_hash{$line}/x) {
520			push(@keyword_tvi, $line);
521		    }
522		}
523	    }
524	    if ($file_emails) {
525		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
526		push(@file_emails, clean_file_emails(@poss_addr));
527	    }
528	}
529    } else {
530	my $file_cnt = @files;
531	my $lastfile;
532
533	open(my $patch, "< $file")
534	    or die "$P: Can't open $file: $!\n";
535
536	# We can check arbitrary information before the patch
537	# like the commit message, mail headers, etc...
538	# This allows us to match arbitrary keywords against any part
539	# of a git format-patch generated file (subject tags, etc...)
540
541	my $patch_prefix = "";			#Parsing the intro
542
543	while (<$patch>) {
544	    my $patch_line = $_;
545	    if (m/^ mode change [0-7]+ => [0-7]+ (\S+)\s*$/) {
546		my $filename = $1;
547		push(@files, $filename);
548	    } elsif (m/^rename (?:from|to) (\S+)\s*$/) {
549		my $filename = $1;
550		push(@files, $filename);
551	    } elsif (m/^diff --git a\/(\S+) b\/(\S+)\s*$/) {
552		my $filename1 = $1;
553		my $filename2 = $2;
554		push(@files, $filename1);
555		push(@files, $filename2);
556	    } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
557		my $filename = $1;
558		$filename =~ s@^[^/]*/@@;
559		$filename =~ s@\n@@;
560		$lastfile = $filename;
561		push(@files, $filename);
562		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
563	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
564		if ($email_git_blame) {
565		    push(@range, "$lastfile:$1:$2");
566		}
567	    } elsif ($keywords) {
568		foreach my $line (keys %keyword_hash) {
569		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
570			push(@keyword_tvi, $line);
571		    }
572		}
573	    }
574	}
575	close($patch);
576
577	if ($file_cnt == @files) {
578	    warn "$P: file '${file}' doesn't appear to be a patch.  "
579		. "Add -f to options?\n";
580	}
581	@files = sort_and_uniq(@files);
582    }
583}
584
585@file_emails = uniq(@file_emails);
586
587my %email_hash_name;
588my %email_hash_address;
589my @email_to = ();
590my %hash_list_to;
591my @list_to = ();
592my @scm = ();
593my @web = ();
594my @subsystem = ();
595my @status = ();
596my %deduplicate_name_hash = ();
597my %deduplicate_address_hash = ();
598
599my @maintainers = get_maintainers();
600
601if (@maintainers) {
602    @maintainers = merge_email(@maintainers);
603    output(@maintainers);
604}
605
606if ($scm) {
607    @scm = uniq(@scm);
608    output(@scm);
609}
610
611if ($status) {
612    @status = uniq(@status);
613    output(@status);
614}
615
616if ($subsystem) {
617    @subsystem = uniq(@subsystem);
618    output(@subsystem);
619}
620
621if ($web) {
622    @web = uniq(@web);
623    output(@web);
624}
625
626exit($exit);
627
628sub self_test {
629    my @lsfiles = ();
630    my @good_links = ();
631    my @bad_links = ();
632    my @section_headers = ();
633    my $index = 0;
634
635    @lsfiles = vcs_list_files($lk_path);
636
637    for my $x (@self_test_info) {
638	$index++;
639
640	## Section header duplication and missing section content
641	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
642	    $x->{line} =~ /^\S[^:]/ &&
643	    defined $self_test_info[$index] &&
644	    $self_test_info[$index]->{line} =~ /^([A-Z]):\s*\S/) {
645	    my $has_S = 0;
646	    my $has_F = 0;
647	    my $has_ML = 0;
648	    my $status = "";
649	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
650		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
651	    } else {
652		push(@section_headers, $x->{line});
653	    }
654	    my $nextline = $index;
655	    while (defined $self_test_info[$nextline] &&
656		   $self_test_info[$nextline]->{line} =~ /^([A-Z]):\s*(\S.*)/) {
657		my $type = $1;
658		my $value = $2;
659		if ($type eq "S") {
660		    $has_S = 1;
661		    $status = $value;
662		} elsif ($type eq "F" || $type eq "N") {
663		    $has_F = 1;
664		} elsif ($type eq "M" || $type eq "R" || $type eq "L") {
665		    $has_ML = 1;
666		}
667		$nextline++;
668	    }
669	    if (!$has_ML && $status !~ /orphan|obsolete/i) {
670		print("$x->{file}:$x->{linenr}: warning: section without email address\t$x->{line}\n");
671	    }
672	    if (!$has_S) {
673		print("$x->{file}:$x->{linenr}: warning: section without status \t$x->{line}\n");
674	    }
675	    if (!$has_F) {
676		print("$x->{file}:$x->{linenr}: warning: section without file pattern\t$x->{line}\n");
677	    }
678	}
679
680	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
681
682	my $type = $1;
683	my $value = $2;
684
685	## Filename pattern matching
686	if (($type eq "F" || $type eq "X") &&
687	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
688	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
689	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
690	    $value =~ s/\?/\./g;         ##Convert ? to .
691	    ##if pattern is a directory and it lacks a trailing slash, add one
692	    if ((-d $value)) {
693		$value =~ s@([^/])$@$1/@;
694	    }
695	    if (!grep(m@^$value@, @lsfiles)) {
696		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
697	    }
698
699	## Link reachability
700	} elsif (($type eq "W" || $type eq "Q" || $type eq "B") &&
701		 $value =~ /^https?:/ &&
702		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
703	    next if (grep(m@^\Q$value\E$@, @good_links));
704	    my $isbad = 0;
705	    if (grep(m@^\Q$value\E$@, @bad_links)) {
706	        $isbad = 1;
707	    } else {
708		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
709		if ($? == 0) {
710		    push(@good_links, $value);
711		} else {
712		    push(@bad_links, $value);
713		    $isbad = 1;
714		}
715	    }
716	    if ($isbad) {
717	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
718	    }
719
720	## SCM reachability
721	} elsif ($type eq "T" &&
722		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
723	    next if (grep(m@^\Q$value\E$@, @good_links));
724	    my $isbad = 0;
725	    if (grep(m@^\Q$value\E$@, @bad_links)) {
726	        $isbad = 1;
727            } elsif ($value !~ /^(?:git|quilt|hg)\s+\S/) {
728		print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
729	    } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
730		my $url = $1;
731		my $branch = "";
732		$branch = $3 if $3;
733		my $output = `git ls-remote --exit-code -h "$url" $branch > /dev/null 2>&1`;
734		if ($? == 0) {
735		    push(@good_links, $value);
736		} else {
737		    push(@bad_links, $value);
738		    $isbad = 1;
739		}
740	    } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
741		my $url = $1;
742		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
743		if ($? == 0) {
744		    push(@good_links, $value);
745		} else {
746		    push(@bad_links, $value);
747		    $isbad = 1;
748		}
749	    }
750	    if ($isbad) {
751		print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
752	    }
753	}
754    }
755}
756
757sub ignore_email_address {
758    my ($address) = @_;
759
760    foreach my $ignore (@ignore_emails) {
761	return 1 if ($ignore eq $address);
762    }
763
764    return 0;
765}
766
767sub range_is_maintained {
768    my ($start, $end) = @_;
769
770    for (my $i = $start; $i < $end; $i++) {
771	my $line = $typevalue[$i];
772	if ($line =~ m/^([A-Z]):\s*(.*)/) {
773	    my $type = $1;
774	    my $value = $2;
775	    if ($type eq 'S') {
776		if ($value =~ /(maintain|support)/i) {
777		    return 1;
778		}
779	    }
780	}
781    }
782    return 0;
783}
784
785sub range_has_maintainer {
786    my ($start, $end) = @_;
787
788    for (my $i = $start; $i < $end; $i++) {
789	my $line = $typevalue[$i];
790	if ($line =~ m/^([A-Z]):\s*(.*)/) {
791	    my $type = $1;
792	    my $value = $2;
793	    if ($type eq 'M') {
794		return 1;
795	    }
796	}
797    }
798    return 0;
799}
800
801sub get_maintainers {
802    %email_hash_name = ();
803    %email_hash_address = ();
804    %commit_author_hash = ();
805    %commit_signer_hash = ();
806    @email_to = ();
807    %hash_list_to = ();
808    @list_to = ();
809    @scm = ();
810    @web = ();
811    @subsystem = ();
812    @status = ();
813    %deduplicate_name_hash = ();
814    %deduplicate_address_hash = ();
815    if ($email_git_all_signature_types) {
816	$signature_pattern = "(.+?)[Bb][Yy]:";
817    } else {
818	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
819    }
820
821    # Find responsible parties
822
823    my %exact_pattern_match_hash = ();
824
825    foreach my $file (@files) {
826
827	my %hash;
828	my $tvi = find_first_section();
829	while ($tvi < @typevalue) {
830	    my $start = find_starting_index($tvi);
831	    my $end = find_ending_index($tvi);
832	    my $exclude = 0;
833	    my $i;
834
835	    #Do not match excluded file patterns
836
837	    for ($i = $start; $i < $end; $i++) {
838		my $line = $typevalue[$i];
839		if ($line =~ m/^([A-Z]):\s*(.*)/) {
840		    my $type = $1;
841		    my $value = $2;
842		    if ($type eq 'X') {
843			if (file_match_pattern($file, $value)) {
844			    $exclude = 1;
845			    last;
846			}
847		    }
848		}
849	    }
850
851	    if (!$exclude) {
852		for ($i = $start; $i < $end; $i++) {
853		    my $line = $typevalue[$i];
854		    if ($line =~ m/^([A-Z]):\s*(.*)/) {
855			my $type = $1;
856			my $value = $2;
857			if ($type eq 'F') {
858			    if (file_match_pattern($file, $value)) {
859				my $value_pd = ($value =~ tr@/@@);
860				my $file_pd = ($file  =~ tr@/@@);
861				$value_pd++ if (substr($value,-1,1) ne "/");
862				$value_pd = -1 if ($value =~ /^\.\*/);
863				if ($value_pd >= $file_pd &&
864				    range_is_maintained($start, $end) &&
865				    range_has_maintainer($start, $end)) {
866				    $exact_pattern_match_hash{$file} = 1;
867				}
868				if ($pattern_depth == 0 ||
869				    (($file_pd - $value_pd) < $pattern_depth)) {
870				    $hash{$tvi} = $value_pd;
871				}
872			    }
873			} elsif ($type eq 'N') {
874			    if ($file =~ m/$value/x) {
875				$hash{$tvi} = 0;
876			    }
877			}
878		    }
879		}
880	    }
881	    $tvi = $end + 1;
882	}
883
884	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
885	    add_categories($line);
886	    if ($sections) {
887		my $i;
888		my $start = find_starting_index($line);
889		my $end = find_ending_index($line);
890		for ($i = $start; $i < $end; $i++) {
891		    my $line = $typevalue[$i];
892		    if ($line =~ /^[FX]:/) {		##Restore file patterns
893			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
894			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
895			$line =~ s/\\\./\./g;       	##Convert \. to .
896			$line =~ s/\.\*/\*/g;       	##Convert .* to *
897		    }
898		    my $count = $line =~ s/^([A-Z]):/$1:\t/g;
899		    if ($letters eq "" || (!$count || $letters =~ /$1/i)) {
900			print("$line\n");
901		    }
902		}
903		print("\n");
904	    }
905	}
906    }
907
908    if ($keywords) {
909	@keyword_tvi = sort_and_uniq(@keyword_tvi);
910	foreach my $line (@keyword_tvi) {
911	    add_categories($line);
912	}
913    }
914
915    foreach my $email (@email_to, @list_to) {
916	$email->[0] = deduplicate_email($email->[0]);
917    }
918
919    foreach my $file (@files) {
920	if ($email &&
921	    ($email_git || ($email_git_fallback &&
922			    !$exact_pattern_match_hash{$file}))) {
923	    vcs_file_signoffs($file);
924	}
925	if ($email && $email_git_blame) {
926	    vcs_file_blame($file);
927	}
928    }
929
930    if ($email) {
931	foreach my $chief (@penguin_chief) {
932	    if ($chief =~ m/^(.*):(.*)/) {
933		my $email_address;
934
935		$email_address = format_email($1, $2, $email_usename);
936		if ($email_git_penguin_chiefs) {
937		    push(@email_to, [$email_address, 'chief penguin']);
938		} else {
939		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
940		}
941	    }
942	}
943
944	foreach my $email (@file_emails) {
945	    my ($name, $address) = parse_email($email);
946
947	    my $tmp_email = format_email($name, $address, $email_usename);
948	    push_email_address($tmp_email, '');
949	    add_role($tmp_email, 'in file');
950	}
951    }
952
953    my @to = ();
954    if ($email || $email_list) {
955	if ($email) {
956	    @to = (@to, @email_to);
957	}
958	if ($email_list) {
959	    @to = (@to, @list_to);
960	}
961    }
962
963    if ($interactive) {
964	@to = interactive_get_maintainers(\@to);
965    }
966
967    return @to;
968}
969
970sub file_match_pattern {
971    my ($file, $pattern) = @_;
972    if (substr($pattern, -1) eq "/") {
973	if ($file =~ m@^$pattern@) {
974	    return 1;
975	}
976    } else {
977	if ($file =~ m@^$pattern@) {
978	    my $s1 = ($file =~ tr@/@@);
979	    my $s2 = ($pattern =~ tr@/@@);
980	    if ($s1 == $s2) {
981		return 1;
982	    }
983	}
984    }
985    return 0;
986}
987
988sub usage {
989    print <<EOT;
990usage: $P [options] patchfile
991       $P [options] -f file|directory
992version: $V
993
994MAINTAINER field selection options:
995  --email => print email address(es) if any
996    --git => include recent git \*-by: signers
997    --git-all-signature-types => include signers regardless of signature type
998        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
999    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
1000    --git-chief-penguins => include ${penguin_chiefs}
1001    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
1002    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
1003    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
1004    --git-blame => use git blame to find modified commits for patch or file
1005    --git-blame-signatures => when used with --git-blame, also include all commit signers
1006    --git-since => git history to use (default: $email_git_since)
1007    --hg-since => hg history to use (default: $email_hg_since)
1008    --interactive => display a menu (mostly useful if used with the --git option)
1009    --m => include maintainer(s) if any
1010    --r => include reviewer(s) if any
1011    --n => include name 'Full Name <addr\@domain.tld>'
1012    --l => include list(s) if any
1013    --s => include subscriber only list(s) if any
1014    --remove-duplicates => minimize duplicate email names/addresses
1015    --roles => show roles (status:subsystem, git-signer, list, etc...)
1016    --rolestats => show roles and statistics (commits/total_commits, %)
1017    --file-emails => add email addresses found in -f file (default: 0 (off))
1018  --scm => print SCM tree(s) if any
1019  --status => print status if any
1020  --subsystem => print subsystem name if any
1021  --web => print website(s) if any
1022
1023Output type options:
1024  --separator [, ] => separator for multiple entries on 1 line
1025    using --separator also sets --nomultiline if --separator is not [, ]
1026  --multiline => print 1 entry per line
1027
1028Other options:
1029  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
1030  --keywords => scan patch for keywords (default: $keywords)
1031  --sections => print all of the subsystem sections with pattern matches
1032  --letters => print all matching 'letter' types from all matching sections
1033  --mailmap => use .mailmap file (default: $email_use_mailmap)
1034  --self-test => show potential issues with MAINTAINERS file content
1035  --version => show version
1036  --help => show this help information
1037
1038Default options:
1039  [--email --nogit --git-fallback --m --r --n --l --multiline --pattern-depth=0
1040   --remove-duplicates --rolestats]
1041
1042Notes:
1043  Using "-f directory" may give unexpected results:
1044      Used with "--git", git signators for _all_ files in and below
1045          directory are examined as git recurses directories.
1046          Any specified X: (exclude) pattern matches are _not_ ignored.
1047      Used with "--nogit", directory is used as a pattern match,
1048          no individual file within the directory or subdirectory
1049          is matched.
1050      Used with "--git-blame", does not iterate all files in directory
1051  Using "--git-blame" is slow and may add old committers and authors
1052      that are no longer active maintainers to the output.
1053  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
1054      other automated tools that expect only ["name"] <email address>
1055      may not work because of additional output after <email address>.
1056  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
1057      not the percentage of the entire file authored.  # of commits is
1058      not a good measure of amount of code authored.  1 major commit may
1059      contain a thousand lines, 5 trivial commits may modify a single line.
1060  If git is not installed, but mercurial (hg) is installed and an .hg
1061      repository exists, the following options apply to mercurial:
1062          --git,
1063          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
1064          --git-blame
1065      Use --hg-since not --git-since to control date selection
1066  File ".get_maintainer.conf", if it exists in the linux kernel source root
1067      directory, can change whatever get_maintainer defaults are desired.
1068      Entries in this file can be any command line argument.
1069      This file is prepended to any additional command line arguments.
1070      Multiple lines and # comments are allowed.
1071  Most options have both positive and negative forms.
1072      The negative forms for --<foo> are --no<foo> and --no-<foo>.
1073
1074EOT
1075}
1076
1077sub top_of_kernel_tree {
1078    my ($lk_path) = @_;
1079
1080    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
1081	$lk_path .= "/";
1082    }
1083    if (   (-f "${lk_path}COPYING")
1084	&& (-f "${lk_path}CREDITS")
1085	&& (-f "${lk_path}Kbuild")
1086	&& (-e "${lk_path}MAINTAINERS")
1087	&& (-f "${lk_path}Makefile")
1088	&& (-f "${lk_path}README")
1089	&& (-d "${lk_path}Documentation")
1090	&& (-d "${lk_path}arch")
1091	&& (-d "${lk_path}include")
1092	&& (-d "${lk_path}drivers")
1093	&& (-d "${lk_path}fs")
1094	&& (-d "${lk_path}init")
1095	&& (-d "${lk_path}ipc")
1096	&& (-d "${lk_path}kernel")
1097	&& (-d "${lk_path}lib")
1098	&& (-d "${lk_path}scripts")) {
1099	return 1;
1100    }
1101    return 0;
1102}
1103
1104sub parse_email {
1105    my ($formatted_email) = @_;
1106
1107    my $name = "";
1108    my $address = "";
1109
1110    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
1111	$name = $1;
1112	$address = $2;
1113    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
1114	$address = $1;
1115    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
1116	$address = $1;
1117    }
1118
1119    $name =~ s/^\s+|\s+$//g;
1120    $name =~ s/^\"|\"$//g;
1121    $address =~ s/^\s+|\s+$//g;
1122
1123    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
1124	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1125	$name = "\"$name\"";
1126    }
1127
1128    return ($name, $address);
1129}
1130
1131sub format_email {
1132    my ($name, $address, $usename) = @_;
1133
1134    my $formatted_email;
1135
1136    $name =~ s/^\s+|\s+$//g;
1137    $name =~ s/^\"|\"$//g;
1138    $address =~ s/^\s+|\s+$//g;
1139
1140    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
1141	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1142	$name = "\"$name\"";
1143    }
1144
1145    if ($usename) {
1146	if ("$name" eq "") {
1147	    $formatted_email = "$address";
1148	} else {
1149	    $formatted_email = "$name <$address>";
1150	}
1151    } else {
1152	$formatted_email = $address;
1153    }
1154
1155    return $formatted_email;
1156}
1157
1158sub find_first_section {
1159    my $index = 0;
1160
1161    while ($index < @typevalue) {
1162	my $tv = $typevalue[$index];
1163	if (($tv =~ m/^([A-Z]):\s*(.*)/)) {
1164	    last;
1165	}
1166	$index++;
1167    }
1168
1169    return $index;
1170}
1171
1172sub find_starting_index {
1173    my ($index) = @_;
1174
1175    while ($index > 0) {
1176	my $tv = $typevalue[$index];
1177	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1178	    last;
1179	}
1180	$index--;
1181    }
1182
1183    return $index;
1184}
1185
1186sub find_ending_index {
1187    my ($index) = @_;
1188
1189    while ($index < @typevalue) {
1190	my $tv = $typevalue[$index];
1191	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1192	    last;
1193	}
1194	$index++;
1195    }
1196
1197    return $index;
1198}
1199
1200sub get_subsystem_name {
1201    my ($index) = @_;
1202
1203    my $start = find_starting_index($index);
1204
1205    my $subsystem = $typevalue[$start];
1206    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
1207	$subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
1208	$subsystem =~ s/\s*$//;
1209	$subsystem = $subsystem . "...";
1210    }
1211    return $subsystem;
1212}
1213
1214sub get_maintainer_role {
1215    my ($index) = @_;
1216
1217    my $i;
1218    my $start = find_starting_index($index);
1219    my $end = find_ending_index($index);
1220
1221    my $role = "unknown";
1222    my $subsystem = get_subsystem_name($index);
1223
1224    for ($i = $start + 1; $i < $end; $i++) {
1225	my $tv = $typevalue[$i];
1226	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1227	    my $ptype = $1;
1228	    my $pvalue = $2;
1229	    if ($ptype eq "S") {
1230		$role = $pvalue;
1231	    }
1232	}
1233    }
1234
1235    $role = lc($role);
1236    if      ($role eq "supported") {
1237	$role = "supporter";
1238    } elsif ($role eq "maintained") {
1239	$role = "maintainer";
1240    } elsif ($role eq "odd fixes") {
1241	$role = "odd fixer";
1242    } elsif ($role eq "orphan") {
1243	$role = "orphan minder";
1244    } elsif ($role eq "obsolete") {
1245	$role = "obsolete minder";
1246    } elsif ($role eq "buried alive in reporters") {
1247	$role = "chief penguin";
1248    }
1249
1250    return $role . ":" . $subsystem;
1251}
1252
1253sub get_list_role {
1254    my ($index) = @_;
1255
1256    my $subsystem = get_subsystem_name($index);
1257
1258    if ($subsystem eq "THE REST") {
1259	$subsystem = "";
1260    }
1261
1262    return $subsystem;
1263}
1264
1265sub add_categories {
1266    my ($index) = @_;
1267
1268    my $i;
1269    my $start = find_starting_index($index);
1270    my $end = find_ending_index($index);
1271
1272    push(@subsystem, $typevalue[$start]);
1273
1274    for ($i = $start + 1; $i < $end; $i++) {
1275	my $tv = $typevalue[$i];
1276	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1277	    my $ptype = $1;
1278	    my $pvalue = $2;
1279	    if ($ptype eq "L") {
1280		my $list_address = $pvalue;
1281		my $list_additional = "";
1282		my $list_role = get_list_role($i);
1283
1284		if ($list_role ne "") {
1285		    $list_role = ":" . $list_role;
1286		}
1287		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1288		    $list_address = $1;
1289		    $list_additional = $2;
1290		}
1291		if ($list_additional =~ m/subscribers-only/) {
1292		    if ($email_subscriber_list) {
1293			if (!$hash_list_to{lc($list_address)}) {
1294			    $hash_list_to{lc($list_address)} = 1;
1295			    push(@list_to, [$list_address,
1296					    "subscriber list${list_role}"]);
1297			}
1298		    }
1299		} else {
1300		    if ($email_list) {
1301			if (!$hash_list_to{lc($list_address)}) {
1302			    $hash_list_to{lc($list_address)} = 1;
1303			    if ($list_additional =~ m/moderated/) {
1304				push(@list_to, [$list_address,
1305						"moderated list${list_role}"]);
1306			    } else {
1307				push(@list_to, [$list_address,
1308						"open list${list_role}"]);
1309			    }
1310			}
1311		    }
1312		}
1313	    } elsif ($ptype eq "M") {
1314		my ($name, $address) = parse_email($pvalue);
1315		if ($name eq "") {
1316		    if ($i > 0) {
1317			my $tv = $typevalue[$i - 1];
1318			if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1319			    if ($1 eq "P") {
1320				$name = $2;
1321				$pvalue = format_email($name, $address, $email_usename);
1322			    }
1323			}
1324		    }
1325		}
1326		if ($email_maintainer) {
1327		    my $role = get_maintainer_role($i);
1328		    push_email_addresses($pvalue, $role);
1329		}
1330	    } elsif ($ptype eq "R") {
1331		my ($name, $address) = parse_email($pvalue);
1332		if ($name eq "") {
1333		    if ($i > 0) {
1334			my $tv = $typevalue[$i - 1];
1335			if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1336			    if ($1 eq "P") {
1337				$name = $2;
1338				$pvalue = format_email($name, $address, $email_usename);
1339			    }
1340			}
1341		    }
1342		}
1343		if ($email_reviewer) {
1344		    my $subsystem = get_subsystem_name($i);
1345		    push_email_addresses($pvalue, "reviewer:$subsystem");
1346		}
1347	    } elsif ($ptype eq "T") {
1348		push(@scm, $pvalue);
1349	    } elsif ($ptype eq "W") {
1350		push(@web, $pvalue);
1351	    } elsif ($ptype eq "S") {
1352		push(@status, $pvalue);
1353	    }
1354	}
1355    }
1356}
1357
1358sub email_inuse {
1359    my ($name, $address) = @_;
1360
1361    return 1 if (($name eq "") && ($address eq ""));
1362    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1363    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1364
1365    return 0;
1366}
1367
1368sub push_email_address {
1369    my ($line, $role) = @_;
1370
1371    my ($name, $address) = parse_email($line);
1372
1373    if ($address eq "") {
1374	return 0;
1375    }
1376
1377    if (!$email_remove_duplicates) {
1378	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1379    } elsif (!email_inuse($name, $address)) {
1380	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1381	$email_hash_name{lc($name)}++ if ($name ne "");
1382	$email_hash_address{lc($address)}++;
1383    }
1384
1385    return 1;
1386}
1387
1388sub push_email_addresses {
1389    my ($address, $role) = @_;
1390
1391    my @address_list = ();
1392
1393    if (rfc822_valid($address)) {
1394	push_email_address($address, $role);
1395    } elsif (@address_list = rfc822_validlist($address)) {
1396	my $array_count = shift(@address_list);
1397	while (my $entry = shift(@address_list)) {
1398	    push_email_address($entry, $role);
1399	}
1400    } else {
1401	if (!push_email_address($address, $role)) {
1402	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1403	}
1404    }
1405}
1406
1407sub add_role {
1408    my ($line, $role) = @_;
1409
1410    my ($name, $address) = parse_email($line);
1411    my $email = format_email($name, $address, $email_usename);
1412
1413    foreach my $entry (@email_to) {
1414	if ($email_remove_duplicates) {
1415	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1416	    if (($name eq $entry_name || $address eq $entry_address)
1417		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1418	    ) {
1419		if ($entry->[1] eq "") {
1420		    $entry->[1] = "$role";
1421		} else {
1422		    $entry->[1] = "$entry->[1],$role";
1423		}
1424	    }
1425	} else {
1426	    if ($email eq $entry->[0]
1427		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1428	    ) {
1429		if ($entry->[1] eq "") {
1430		    $entry->[1] = "$role";
1431		} else {
1432		    $entry->[1] = "$entry->[1],$role";
1433		}
1434	    }
1435	}
1436    }
1437}
1438
1439sub which {
1440    my ($bin) = @_;
1441
1442    foreach my $path (split(/:/, $ENV{PATH})) {
1443	if (-e "$path/$bin") {
1444	    return "$path/$bin";
1445	}
1446    }
1447
1448    return "";
1449}
1450
1451sub which_conf {
1452    my ($conf) = @_;
1453
1454    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1455	if (-e "$path/$conf") {
1456	    return "$path/$conf";
1457	}
1458    }
1459
1460    return "";
1461}
1462
1463sub mailmap_email {
1464    my ($line) = @_;
1465
1466    my ($name, $address) = parse_email($line);
1467    my $email = format_email($name, $address, 1);
1468    my $real_name = $name;
1469    my $real_address = $address;
1470
1471    if (exists $mailmap->{names}->{$email} ||
1472	exists $mailmap->{addresses}->{$email}) {
1473	if (exists $mailmap->{names}->{$email}) {
1474	    $real_name = $mailmap->{names}->{$email};
1475	}
1476	if (exists $mailmap->{addresses}->{$email}) {
1477	    $real_address = $mailmap->{addresses}->{$email};
1478	}
1479    } else {
1480	if (exists $mailmap->{names}->{$address}) {
1481	    $real_name = $mailmap->{names}->{$address};
1482	}
1483	if (exists $mailmap->{addresses}->{$address}) {
1484	    $real_address = $mailmap->{addresses}->{$address};
1485	}
1486    }
1487    return format_email($real_name, $real_address, 1);
1488}
1489
1490sub mailmap {
1491    my (@addresses) = @_;
1492
1493    my @mapped_emails = ();
1494    foreach my $line (@addresses) {
1495	push(@mapped_emails, mailmap_email($line));
1496    }
1497    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1498    return @mapped_emails;
1499}
1500
1501sub merge_by_realname {
1502    my %address_map;
1503    my (@emails) = @_;
1504
1505    foreach my $email (@emails) {
1506	my ($name, $address) = parse_email($email);
1507	if (exists $address_map{$name}) {
1508	    $address = $address_map{$name};
1509	    $email = format_email($name, $address, 1);
1510	} else {
1511	    $address_map{$name} = $address;
1512	}
1513    }
1514}
1515
1516sub git_execute_cmd {
1517    my ($cmd) = @_;
1518    my @lines = ();
1519
1520    my $output = `$cmd`;
1521    $output =~ s/^\s*//gm;
1522    @lines = split("\n", $output);
1523
1524    return @lines;
1525}
1526
1527sub hg_execute_cmd {
1528    my ($cmd) = @_;
1529    my @lines = ();
1530
1531    my $output = `$cmd`;
1532    @lines = split("\n", $output);
1533
1534    return @lines;
1535}
1536
1537sub extract_formatted_signatures {
1538    my (@signature_lines) = @_;
1539
1540    my @type = @signature_lines;
1541
1542    s/\s*(.*):.*/$1/ for (@type);
1543
1544    # cut -f2- -d":"
1545    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1546
1547## Reformat email addresses (with names) to avoid badly written signatures
1548
1549    foreach my $signer (@signature_lines) {
1550	$signer = deduplicate_email($signer);
1551    }
1552
1553    return (\@type, \@signature_lines);
1554}
1555
1556sub vcs_find_signers {
1557    my ($cmd, $file) = @_;
1558    my $commits;
1559    my @lines = ();
1560    my @signatures = ();
1561    my @authors = ();
1562    my @stats = ();
1563
1564    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1565
1566    my $pattern = $VCS_cmds{"commit_pattern"};
1567    my $author_pattern = $VCS_cmds{"author_pattern"};
1568    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1569
1570    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1571
1572    $commits = grep(/$pattern/, @lines);	# of commits
1573
1574    @authors = grep(/$author_pattern/, @lines);
1575    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1576    @stats = grep(/$stat_pattern/, @lines);
1577
1578#    print("stats: <@stats>\n");
1579
1580    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1581
1582    save_commits_by_author(@lines) if ($interactive);
1583    save_commits_by_signer(@lines) if ($interactive);
1584
1585    if (!$email_git_penguin_chiefs) {
1586	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1587    }
1588
1589    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1590    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1591
1592    return ($commits, $signers_ref, $authors_ref, \@stats);
1593}
1594
1595sub vcs_find_author {
1596    my ($cmd) = @_;
1597    my @lines = ();
1598
1599    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1600
1601    if (!$email_git_penguin_chiefs) {
1602	@lines = grep(!/${penguin_chiefs}/i, @lines);
1603    }
1604
1605    return @lines if !@lines;
1606
1607    my @authors = ();
1608    foreach my $line (@lines) {
1609	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1610	    my $author = $1;
1611	    my ($name, $address) = parse_email($author);
1612	    $author = format_email($name, $address, 1);
1613	    push(@authors, $author);
1614	}
1615    }
1616
1617    save_commits_by_author(@lines) if ($interactive);
1618    save_commits_by_signer(@lines) if ($interactive);
1619
1620    return @authors;
1621}
1622
1623sub vcs_save_commits {
1624    my ($cmd) = @_;
1625    my @lines = ();
1626    my @commits = ();
1627
1628    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1629
1630    foreach my $line (@lines) {
1631	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1632	    push(@commits, $1);
1633	}
1634    }
1635
1636    return @commits;
1637}
1638
1639sub vcs_blame {
1640    my ($file) = @_;
1641    my $cmd;
1642    my @commits = ();
1643
1644    return @commits if (!(-f $file));
1645
1646    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1647	my @all_commits = ();
1648
1649	$cmd = $VCS_cmds{"blame_file_cmd"};
1650	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1651	@all_commits = vcs_save_commits($cmd);
1652
1653	foreach my $file_range_diff (@range) {
1654	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1655	    my $diff_file = $1;
1656	    my $diff_start = $2;
1657	    my $diff_length = $3;
1658	    next if ("$file" ne "$diff_file");
1659	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1660		push(@commits, $all_commits[$i]);
1661	    }
1662	}
1663    } elsif (@range) {
1664	foreach my $file_range_diff (@range) {
1665	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1666	    my $diff_file = $1;
1667	    my $diff_start = $2;
1668	    my $diff_length = $3;
1669	    next if ("$file" ne "$diff_file");
1670	    $cmd = $VCS_cmds{"blame_range_cmd"};
1671	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1672	    push(@commits, vcs_save_commits($cmd));
1673	}
1674    } else {
1675	$cmd = $VCS_cmds{"blame_file_cmd"};
1676	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1677	@commits = vcs_save_commits($cmd);
1678    }
1679
1680    foreach my $commit (@commits) {
1681	$commit =~ s/^\^//g;
1682    }
1683
1684    return @commits;
1685}
1686
1687my $printed_novcs = 0;
1688sub vcs_exists {
1689    %VCS_cmds = %VCS_cmds_git;
1690    return 1 if eval $VCS_cmds{"available"};
1691    %VCS_cmds = %VCS_cmds_hg;
1692    return 2 if eval $VCS_cmds{"available"};
1693    %VCS_cmds = ();
1694    if (!$printed_novcs) {
1695	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1696	warn("Using a git repository produces better results.\n");
1697	warn("Try Linus Torvalds' latest git repository using:\n");
1698	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1699	$printed_novcs = 1;
1700    }
1701    return 0;
1702}
1703
1704sub vcs_is_git {
1705    vcs_exists();
1706    return $vcs_used == 1;
1707}
1708
1709sub vcs_is_hg {
1710    return $vcs_used == 2;
1711}
1712
1713sub interactive_get_maintainers {
1714    my ($list_ref) = @_;
1715    my @list = @$list_ref;
1716
1717    vcs_exists();
1718
1719    my %selected;
1720    my %authored;
1721    my %signed;
1722    my $count = 0;
1723    my $maintained = 0;
1724    foreach my $entry (@list) {
1725	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1726	$selected{$count} = 1;
1727	$authored{$count} = 0;
1728	$signed{$count} = 0;
1729	$count++;
1730    }
1731
1732    #menu loop
1733    my $done = 0;
1734    my $print_options = 0;
1735    my $redraw = 1;
1736    while (!$done) {
1737	$count = 0;
1738	if ($redraw) {
1739	    printf STDERR "\n%1s %2s %-65s",
1740			  "*", "#", "email/list and role:stats";
1741	    if ($email_git ||
1742		($email_git_fallback && !$maintained) ||
1743		$email_git_blame) {
1744		print STDERR "auth sign";
1745	    }
1746	    print STDERR "\n";
1747	    foreach my $entry (@list) {
1748		my $email = $entry->[0];
1749		my $role = $entry->[1];
1750		my $sel = "";
1751		$sel = "*" if ($selected{$count});
1752		my $commit_author = $commit_author_hash{$email};
1753		my $commit_signer = $commit_signer_hash{$email};
1754		my $authored = 0;
1755		my $signed = 0;
1756		$authored++ for (@{$commit_author});
1757		$signed++ for (@{$commit_signer});
1758		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1759		printf STDERR "%4d %4d", $authored, $signed
1760		    if ($authored > 0 || $signed > 0);
1761		printf STDERR "\n     %s\n", $role;
1762		if ($authored{$count}) {
1763		    my $commit_author = $commit_author_hash{$email};
1764		    foreach my $ref (@{$commit_author}) {
1765			print STDERR "     Author: @{$ref}[1]\n";
1766		    }
1767		}
1768		if ($signed{$count}) {
1769		    my $commit_signer = $commit_signer_hash{$email};
1770		    foreach my $ref (@{$commit_signer}) {
1771			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1772		    }
1773		}
1774
1775		$count++;
1776	    }
1777	}
1778	my $date_ref = \$email_git_since;
1779	$date_ref = \$email_hg_since if (vcs_is_hg());
1780	if ($print_options) {
1781	    $print_options = 0;
1782	    if (vcs_exists()) {
1783		print STDERR <<EOT
1784
1785Version Control options:
1786g  use git history      [$email_git]
1787gf use git-fallback     [$email_git_fallback]
1788b  use git blame        [$email_git_blame]
1789bs use blame signatures [$email_git_blame_signatures]
1790c# minimum commits      [$email_git_min_signatures]
1791%# min percent          [$email_git_min_percent]
1792d# history to use       [$$date_ref]
1793x# max maintainers      [$email_git_max_maintainers]
1794t  all signature types  [$email_git_all_signature_types]
1795m  use .mailmap         [$email_use_mailmap]
1796EOT
1797	    }
1798	    print STDERR <<EOT
1799
1800Additional options:
18010  toggle all
1802tm toggle maintainers
1803tg toggle git entries
1804tl toggle open list entries
1805ts toggle subscriber list entries
1806f  emails in file       [$file_emails]
1807k  keywords in file     [$keywords]
1808r  remove duplicates    [$email_remove_duplicates]
1809p# pattern match depth  [$pattern_depth]
1810EOT
1811	}
1812	print STDERR
1813"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1814
1815	my $input = <STDIN>;
1816	chomp($input);
1817
1818	$redraw = 1;
1819	my $rerun = 0;
1820	my @wish = split(/[, ]+/, $input);
1821	foreach my $nr (@wish) {
1822	    $nr = lc($nr);
1823	    my $sel = substr($nr, 0, 1);
1824	    my $str = substr($nr, 1);
1825	    my $val = 0;
1826	    $val = $1 if $str =~ /^(\d+)$/;
1827
1828	    if ($sel eq "y") {
1829		$interactive = 0;
1830		$done = 1;
1831		$output_rolestats = 0;
1832		$output_roles = 0;
1833		last;
1834	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1835		$selected{$nr - 1} = !$selected{$nr - 1};
1836	    } elsif ($sel eq "*" || $sel eq '^') {
1837		my $toggle = 0;
1838		$toggle = 1 if ($sel eq '*');
1839		for (my $i = 0; $i < $count; $i++) {
1840		    $selected{$i} = $toggle;
1841		}
1842	    } elsif ($sel eq "0") {
1843		for (my $i = 0; $i < $count; $i++) {
1844		    $selected{$i} = !$selected{$i};
1845		}
1846	    } elsif ($sel eq "t") {
1847		if (lc($str) eq "m") {
1848		    for (my $i = 0; $i < $count; $i++) {
1849			$selected{$i} = !$selected{$i}
1850			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1851		    }
1852		} elsif (lc($str) eq "g") {
1853		    for (my $i = 0; $i < $count; $i++) {
1854			$selected{$i} = !$selected{$i}
1855			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1856		    }
1857		} elsif (lc($str) eq "l") {
1858		    for (my $i = 0; $i < $count; $i++) {
1859			$selected{$i} = !$selected{$i}
1860			    if ($list[$i]->[1] =~ /^(open list)/i);
1861		    }
1862		} elsif (lc($str) eq "s") {
1863		    for (my $i = 0; $i < $count; $i++) {
1864			$selected{$i} = !$selected{$i}
1865			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1866		    }
1867		}
1868	    } elsif ($sel eq "a") {
1869		if ($val > 0 && $val <= $count) {
1870		    $authored{$val - 1} = !$authored{$val - 1};
1871		} elsif ($str eq '*' || $str eq '^') {
1872		    my $toggle = 0;
1873		    $toggle = 1 if ($str eq '*');
1874		    for (my $i = 0; $i < $count; $i++) {
1875			$authored{$i} = $toggle;
1876		    }
1877		}
1878	    } elsif ($sel eq "s") {
1879		if ($val > 0 && $val <= $count) {
1880		    $signed{$val - 1} = !$signed{$val - 1};
1881		} elsif ($str eq '*' || $str eq '^') {
1882		    my $toggle = 0;
1883		    $toggle = 1 if ($str eq '*');
1884		    for (my $i = 0; $i < $count; $i++) {
1885			$signed{$i} = $toggle;
1886		    }
1887		}
1888	    } elsif ($sel eq "o") {
1889		$print_options = 1;
1890		$redraw = 1;
1891	    } elsif ($sel eq "g") {
1892		if ($str eq "f") {
1893		    bool_invert(\$email_git_fallback);
1894		} else {
1895		    bool_invert(\$email_git);
1896		}
1897		$rerun = 1;
1898	    } elsif ($sel eq "b") {
1899		if ($str eq "s") {
1900		    bool_invert(\$email_git_blame_signatures);
1901		} else {
1902		    bool_invert(\$email_git_blame);
1903		}
1904		$rerun = 1;
1905	    } elsif ($sel eq "c") {
1906		if ($val > 0) {
1907		    $email_git_min_signatures = $val;
1908		    $rerun = 1;
1909		}
1910	    } elsif ($sel eq "x") {
1911		if ($val > 0) {
1912		    $email_git_max_maintainers = $val;
1913		    $rerun = 1;
1914		}
1915	    } elsif ($sel eq "%") {
1916		if ($str ne "" && $val >= 0) {
1917		    $email_git_min_percent = $val;
1918		    $rerun = 1;
1919		}
1920	    } elsif ($sel eq "d") {
1921		if (vcs_is_git()) {
1922		    $email_git_since = $str;
1923		} elsif (vcs_is_hg()) {
1924		    $email_hg_since = $str;
1925		}
1926		$rerun = 1;
1927	    } elsif ($sel eq "t") {
1928		bool_invert(\$email_git_all_signature_types);
1929		$rerun = 1;
1930	    } elsif ($sel eq "f") {
1931		bool_invert(\$file_emails);
1932		$rerun = 1;
1933	    } elsif ($sel eq "r") {
1934		bool_invert(\$email_remove_duplicates);
1935		$rerun = 1;
1936	    } elsif ($sel eq "m") {
1937		bool_invert(\$email_use_mailmap);
1938		read_mailmap();
1939		$rerun = 1;
1940	    } elsif ($sel eq "k") {
1941		bool_invert(\$keywords);
1942		$rerun = 1;
1943	    } elsif ($sel eq "p") {
1944		if ($str ne "" && $val >= 0) {
1945		    $pattern_depth = $val;
1946		    $rerun = 1;
1947		}
1948	    } elsif ($sel eq "h" || $sel eq "?") {
1949		print STDERR <<EOT
1950
1951Interactive mode allows you to select the various maintainers, submitters,
1952commit signers and mailing lists that could be CC'd on a patch.
1953
1954Any *'d entry is selected.
1955
1956If you have git or hg installed, you can choose to summarize the commit
1957history of files in the patch.  Also, each line of the current file can
1958be matched to its commit author and that commits signers with blame.
1959
1960Various knobs exist to control the length of time for active commit
1961tracking, the maximum number of commit authors and signers to add,
1962and such.
1963
1964Enter selections at the prompt until you are satisfied that the selected
1965maintainers are appropriate.  You may enter multiple selections separated
1966by either commas or spaces.
1967
1968EOT
1969	    } else {
1970		print STDERR "invalid option: '$nr'\n";
1971		$redraw = 0;
1972	    }
1973	}
1974	if ($rerun) {
1975	    print STDERR "git-blame can be very slow, please have patience..."
1976		if ($email_git_blame);
1977	    goto &get_maintainers;
1978	}
1979    }
1980
1981    #drop not selected entries
1982    $count = 0;
1983    my @new_emailto = ();
1984    foreach my $entry (@list) {
1985	if ($selected{$count}) {
1986	    push(@new_emailto, $list[$count]);
1987	}
1988	$count++;
1989    }
1990    return @new_emailto;
1991}
1992
1993sub bool_invert {
1994    my ($bool_ref) = @_;
1995
1996    if ($$bool_ref) {
1997	$$bool_ref = 0;
1998    } else {
1999	$$bool_ref = 1;
2000    }
2001}
2002
2003sub deduplicate_email {
2004    my ($email) = @_;
2005
2006    my $matched = 0;
2007    my ($name, $address) = parse_email($email);
2008    $email = format_email($name, $address, 1);
2009    $email = mailmap_email($email);
2010
2011    return $email if (!$email_remove_duplicates);
2012
2013    ($name, $address) = parse_email($email);
2014
2015    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
2016	$name = $deduplicate_name_hash{lc($name)}->[0];
2017	$address = $deduplicate_name_hash{lc($name)}->[1];
2018	$matched = 1;
2019    } elsif ($deduplicate_address_hash{lc($address)}) {
2020	$name = $deduplicate_address_hash{lc($address)}->[0];
2021	$address = $deduplicate_address_hash{lc($address)}->[1];
2022	$matched = 1;
2023    }
2024    if (!$matched) {
2025	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
2026	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
2027    }
2028    $email = format_email($name, $address, 1);
2029    $email = mailmap_email($email);
2030    return $email;
2031}
2032
2033sub save_commits_by_author {
2034    my (@lines) = @_;
2035
2036    my @authors = ();
2037    my @commits = ();
2038    my @subjects = ();
2039
2040    foreach my $line (@lines) {
2041	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2042	    my $author = $1;
2043	    $author = deduplicate_email($author);
2044	    push(@authors, $author);
2045	}
2046	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2047	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2048    }
2049
2050    for (my $i = 0; $i < @authors; $i++) {
2051	my $exists = 0;
2052	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
2053	    if (@{$ref}[0] eq $commits[$i] &&
2054		@{$ref}[1] eq $subjects[$i]) {
2055		$exists = 1;
2056		last;
2057	    }
2058	}
2059	if (!$exists) {
2060	    push(@{$commit_author_hash{$authors[$i]}},
2061		 [ ($commits[$i], $subjects[$i]) ]);
2062	}
2063    }
2064}
2065
2066sub save_commits_by_signer {
2067    my (@lines) = @_;
2068
2069    my $commit = "";
2070    my $subject = "";
2071
2072    foreach my $line (@lines) {
2073	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2074	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2075	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
2076	    my @signatures = ($line);
2077	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
2078	    my @types = @$types_ref;
2079	    my @signers = @$signers_ref;
2080
2081	    my $type = $types[0];
2082	    my $signer = $signers[0];
2083
2084	    $signer = deduplicate_email($signer);
2085
2086	    my $exists = 0;
2087	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
2088		if (@{$ref}[0] eq $commit &&
2089		    @{$ref}[1] eq $subject &&
2090		    @{$ref}[2] eq $type) {
2091		    $exists = 1;
2092		    last;
2093		}
2094	    }
2095	    if (!$exists) {
2096		push(@{$commit_signer_hash{$signer}},
2097		     [ ($commit, $subject, $type) ]);
2098	    }
2099	}
2100    }
2101}
2102
2103sub vcs_assign {
2104    my ($role, $divisor, @lines) = @_;
2105
2106    my %hash;
2107    my $count = 0;
2108
2109    return if (@lines <= 0);
2110
2111    if ($divisor <= 0) {
2112	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
2113	$divisor = 1;
2114    }
2115
2116    @lines = mailmap(@lines);
2117
2118    return if (@lines <= 0);
2119
2120    @lines = sort(@lines);
2121
2122    # uniq -c
2123    $hash{$_}++ for @lines;
2124
2125    # sort -rn
2126    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
2127	my $sign_offs = $hash{$line};
2128	my $percent = $sign_offs * 100 / $divisor;
2129
2130	$percent = 100 if ($percent > 100);
2131	next if (ignore_email_address($line));
2132	$count++;
2133	last if ($sign_offs < $email_git_min_signatures ||
2134		 $count > $email_git_max_maintainers ||
2135		 $percent < $email_git_min_percent);
2136	push_email_address($line, '');
2137	if ($output_rolestats) {
2138	    my $fmt_percent = sprintf("%.0f", $percent);
2139	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
2140	} else {
2141	    add_role($line, $role);
2142	}
2143    }
2144}
2145
2146sub vcs_file_signoffs {
2147    my ($file) = @_;
2148
2149    my $authors_ref;
2150    my $signers_ref;
2151    my $stats_ref;
2152    my @authors = ();
2153    my @signers = ();
2154    my @stats = ();
2155    my $commits;
2156
2157    $vcs_used = vcs_exists();
2158    return if (!$vcs_used);
2159
2160    my $cmd = $VCS_cmds{"find_signers_cmd"};
2161    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2162
2163    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2164
2165    @signers = @{$signers_ref} if defined $signers_ref;
2166    @authors = @{$authors_ref} if defined $authors_ref;
2167    @stats = @{$stats_ref} if defined $stats_ref;
2168
2169#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
2170
2171    foreach my $signer (@signers) {
2172	$signer = deduplicate_email($signer);
2173    }
2174
2175    vcs_assign("commit_signer", $commits, @signers);
2176    vcs_assign("authored", $commits, @authors);
2177    if ($#authors == $#stats) {
2178	my $stat_pattern = $VCS_cmds{"stat_pattern"};
2179	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
2180
2181	my $added = 0;
2182	my $deleted = 0;
2183	for (my $i = 0; $i <= $#stats; $i++) {
2184	    if ($stats[$i] =~ /$stat_pattern/) {
2185		$added += $1;
2186		$deleted += $2;
2187	    }
2188	}
2189	my @tmp_authors = uniq(@authors);
2190	foreach my $author (@tmp_authors) {
2191	    $author = deduplicate_email($author);
2192	}
2193	@tmp_authors = uniq(@tmp_authors);
2194	my @list_added = ();
2195	my @list_deleted = ();
2196	foreach my $author (@tmp_authors) {
2197	    my $auth_added = 0;
2198	    my $auth_deleted = 0;
2199	    for (my $i = 0; $i <= $#stats; $i++) {
2200		if ($author eq deduplicate_email($authors[$i]) &&
2201		    $stats[$i] =~ /$stat_pattern/) {
2202		    $auth_added += $1;
2203		    $auth_deleted += $2;
2204		}
2205	    }
2206	    for (my $i = 0; $i < $auth_added; $i++) {
2207		push(@list_added, $author);
2208	    }
2209	    for (my $i = 0; $i < $auth_deleted; $i++) {
2210		push(@list_deleted, $author);
2211	    }
2212	}
2213	vcs_assign("added_lines", $added, @list_added);
2214	vcs_assign("removed_lines", $deleted, @list_deleted);
2215    }
2216}
2217
2218sub vcs_file_blame {
2219    my ($file) = @_;
2220
2221    my @signers = ();
2222    my @all_commits = ();
2223    my @commits = ();
2224    my $total_commits;
2225    my $total_lines;
2226
2227    $vcs_used = vcs_exists();
2228    return if (!$vcs_used);
2229
2230    @all_commits = vcs_blame($file);
2231    @commits = uniq(@all_commits);
2232    $total_commits = @commits;
2233    $total_lines = @all_commits;
2234
2235    if ($email_git_blame_signatures) {
2236	if (vcs_is_hg()) {
2237	    my $commit_count;
2238	    my $commit_authors_ref;
2239	    my $commit_signers_ref;
2240	    my $stats_ref;
2241	    my @commit_authors = ();
2242	    my @commit_signers = ();
2243	    my $commit = join(" -r ", @commits);
2244	    my $cmd;
2245
2246	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
2247	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2248
2249	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2250	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2251	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2252
2253	    push(@signers, @commit_signers);
2254	} else {
2255	    foreach my $commit (@commits) {
2256		my $commit_count;
2257		my $commit_authors_ref;
2258		my $commit_signers_ref;
2259		my $stats_ref;
2260		my @commit_authors = ();
2261		my @commit_signers = ();
2262		my $cmd;
2263
2264		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
2265		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2266
2267		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2268		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2269		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2270
2271		push(@signers, @commit_signers);
2272	    }
2273	}
2274    }
2275
2276    if ($from_filename) {
2277	if ($output_rolestats) {
2278	    my @blame_signers;
2279	    if (vcs_is_hg()) {{		# Double brace for last exit
2280		my $commit_count;
2281		my @commit_signers = ();
2282		@commits = uniq(@commits);
2283		@commits = sort(@commits);
2284		my $commit = join(" -r ", @commits);
2285		my $cmd;
2286
2287		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2288		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2289
2290		my @lines = ();
2291
2292		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2293
2294		if (!$email_git_penguin_chiefs) {
2295		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2296		}
2297
2298		last if !@lines;
2299
2300		my @authors = ();
2301		foreach my $line (@lines) {
2302		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2303			my $author = $1;
2304			$author = deduplicate_email($author);
2305			push(@authors, $author);
2306		    }
2307		}
2308
2309		save_commits_by_author(@lines) if ($interactive);
2310		save_commits_by_signer(@lines) if ($interactive);
2311
2312		push(@signers, @authors);
2313	    }}
2314	    else {
2315		foreach my $commit (@commits) {
2316		    my $i;
2317		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2318		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2319		    my @author = vcs_find_author($cmd);
2320		    next if !@author;
2321
2322		    my $formatted_author = deduplicate_email($author[0]);
2323
2324		    my $count = grep(/$commit/, @all_commits);
2325		    for ($i = 0; $i < $count ; $i++) {
2326			push(@blame_signers, $formatted_author);
2327		    }
2328		}
2329	    }
2330	    if (@blame_signers) {
2331		vcs_assign("authored lines", $total_lines, @blame_signers);
2332	    }
2333	}
2334	foreach my $signer (@signers) {
2335	    $signer = deduplicate_email($signer);
2336	}
2337	vcs_assign("commits", $total_commits, @signers);
2338    } else {
2339	foreach my $signer (@signers) {
2340	    $signer = deduplicate_email($signer);
2341	}
2342	vcs_assign("modified commits", $total_commits, @signers);
2343    }
2344}
2345
2346sub vcs_file_exists {
2347    my ($file) = @_;
2348
2349    my $exists;
2350
2351    my $vcs_used = vcs_exists();
2352    return 0 if (!$vcs_used);
2353
2354    my $cmd = $VCS_cmds{"file_exists_cmd"};
2355    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2356    $cmd .= " 2>&1";
2357    $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
2358
2359    return 0 if ($? != 0);
2360
2361    return $exists;
2362}
2363
2364sub vcs_list_files {
2365    my ($file) = @_;
2366
2367    my @lsfiles = ();
2368
2369    my $vcs_used = vcs_exists();
2370    return 0 if (!$vcs_used);
2371
2372    my $cmd = $VCS_cmds{"list_files_cmd"};
2373    $cmd =~ s/(\$\w+)/$1/eeg;   # interpolate $cmd
2374    @lsfiles = &{$VCS_cmds{"execute_cmd"}}($cmd);
2375
2376    return () if ($? != 0);
2377
2378    return @lsfiles;
2379}
2380
2381sub uniq {
2382    my (@parms) = @_;
2383
2384    my %saw;
2385    @parms = grep(!$saw{$_}++, @parms);
2386    return @parms;
2387}
2388
2389sub sort_and_uniq {
2390    my (@parms) = @_;
2391
2392    my %saw;
2393    @parms = sort @parms;
2394    @parms = grep(!$saw{$_}++, @parms);
2395    return @parms;
2396}
2397
2398sub clean_file_emails {
2399    my (@file_emails) = @_;
2400    my @fmt_emails = ();
2401
2402    foreach my $email (@file_emails) {
2403	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2404	my ($name, $address) = parse_email($email);
2405	if ($name eq '"[,\.]"') {
2406	    $name = "";
2407	}
2408
2409	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2410	if (@nw > 2) {
2411	    my $first = $nw[@nw - 3];
2412	    my $middle = $nw[@nw - 2];
2413	    my $last = $nw[@nw - 1];
2414
2415	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2416		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2417		(length($middle) == 1 ||
2418		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2419		$name = "$first $middle $last";
2420	    } else {
2421		$name = "$middle $last";
2422	    }
2423	}
2424
2425	if (substr($name, -1) =~ /[,\.]/) {
2426	    $name = substr($name, 0, length($name) - 1);
2427	} elsif (substr($name, -2) =~ /[,\.]"/) {
2428	    $name = substr($name, 0, length($name) - 2) . '"';
2429	}
2430
2431	if (substr($name, 0, 1) =~ /[,\.]/) {
2432	    $name = substr($name, 1, length($name) - 1);
2433	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2434	    $name = '"' . substr($name, 2, length($name) - 2);
2435	}
2436
2437	my $fmt_email = format_email($name, $address, $email_usename);
2438	push(@fmt_emails, $fmt_email);
2439    }
2440    return @fmt_emails;
2441}
2442
2443sub merge_email {
2444    my @lines;
2445    my %saw;
2446
2447    for (@_) {
2448	my ($address, $role) = @$_;
2449	if (!$saw{$address}) {
2450	    if ($output_roles) {
2451		push(@lines, "$address ($role)");
2452	    } else {
2453		push(@lines, $address);
2454	    }
2455	    $saw{$address} = 1;
2456	}
2457    }
2458
2459    return @lines;
2460}
2461
2462sub output {
2463    my (@parms) = @_;
2464
2465    if ($output_multiline) {
2466	foreach my $line (@parms) {
2467	    print("${line}\n");
2468	}
2469    } else {
2470	print(join($output_separator, @parms));
2471	print("\n");
2472    }
2473}
2474
2475my $rfc822re;
2476
2477sub make_rfc822re {
2478#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2479#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2480#   This regexp will only work on addresses which have had comments stripped
2481#   and replaced with rfc822_lwsp.
2482
2483    my $specials = '()<>@,;:\\\\".\\[\\]';
2484    my $controls = '\\000-\\037\\177';
2485
2486    my $dtext = "[^\\[\\]\\r\\\\]";
2487    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2488
2489    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2490
2491#   Use zero-width assertion to spot the limit of an atom.  A simple
2492#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2493    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2494    my $word = "(?:$atom|$quoted_string)";
2495    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2496
2497    my $sub_domain = "(?:$atom|$domain_literal)";
2498    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2499
2500    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2501
2502    my $phrase = "$word*";
2503    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2504    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2505    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2506
2507    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2508    my $address = "(?:$mailbox|$group)";
2509
2510    return "$rfc822_lwsp*$address";
2511}
2512
2513sub rfc822_strip_comments {
2514    my $s = shift;
2515#   Recursively remove comments, and replace with a single space.  The simpler
2516#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2517#   chars in atoms, for example.
2518
2519    while ($s =~ s/^((?:[^"\\]|\\.)*
2520                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2521                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2522    return $s;
2523}
2524
2525#   valid: returns true if the parameter is an RFC822 valid address
2526#
2527sub rfc822_valid {
2528    my $s = rfc822_strip_comments(shift);
2529
2530    if (!$rfc822re) {
2531        $rfc822re = make_rfc822re();
2532    }
2533
2534    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2535}
2536
2537#   validlist: In scalar context, returns true if the parameter is an RFC822
2538#              valid list of addresses.
2539#
2540#              In list context, returns an empty list on failure (an invalid
2541#              address was found); otherwise a list whose first element is the
2542#              number of addresses found and whose remaining elements are the
2543#              addresses.  This is needed to disambiguate failure (invalid)
2544#              from success with no addresses found, because an empty string is
2545#              a valid list.
2546
2547sub rfc822_validlist {
2548    my $s = rfc822_strip_comments(shift);
2549
2550    if (!$rfc822re) {
2551        $rfc822re = make_rfc822re();
2552    }
2553    # * null list items are valid according to the RFC
2554    # * the '1' business is to aid in distinguishing failure from no results
2555
2556    my @r;
2557    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2558	$s =~ m/^$rfc822_char*$/) {
2559        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2560            push(@r, $1);
2561        }
2562        return wantarray ? (scalar(@r), @r) : 1;
2563    }
2564    return wantarray ? () : 0;
2565}
2566