xref: /openbmc/linux/scripts/get_maintainer.pl (revision 2e35facf82bcdd9b9eb9129f4fb31127b79249ec)
1#!/usr/bin/env perl
2# SPDX-License-Identifier: GPL-2.0
3#
4# (c) 2007, Joe Perches <joe@perches.com>
5#           created from checkpatch.pl
6#
7# Print selected MAINTAINERS information for
8# the files modified in a patch or for a file
9#
10# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
11#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
12
13use warnings;
14use strict;
15
16my $P = $0;
17my $V = '0.26';
18
19use Getopt::Long qw(:config no_auto_abbrev);
20use Cwd;
21use File::Find;
22
23my $cur_path = fastgetcwd() . '/';
24my $lk_path = "./";
25my $email = 1;
26my $email_usename = 1;
27my $email_maintainer = 1;
28my $email_reviewer = 1;
29my $email_list = 1;
30my $email_subscriber_list = 0;
31my $email_git_penguin_chiefs = 0;
32my $email_git = 0;
33my $email_git_all_signature_types = 0;
34my $email_git_blame = 0;
35my $email_git_blame_signatures = 1;
36my $email_git_fallback = 1;
37my $email_git_min_signatures = 1;
38my $email_git_max_maintainers = 5;
39my $email_git_min_percent = 5;
40my $email_git_since = "1-year-ago";
41my $email_hg_since = "-365";
42my $interactive = 0;
43my $email_remove_duplicates = 1;
44my $email_use_mailmap = 1;
45my $output_multiline = 1;
46my $output_separator = ", ";
47my $output_roles = 0;
48my $output_rolestats = 1;
49my $output_section_maxlen = 50;
50my $scm = 0;
51my $tree = 1;
52my $web = 0;
53my $subsystem = 0;
54my $status = 0;
55my $letters = "";
56my $keywords = 1;
57my $sections = 0;
58my $file_emails = 0;
59my $from_filename = 0;
60my $pattern_depth = 0;
61my $self_test = undef;
62my $version = 0;
63my $help = 0;
64my $find_maintainer_files = 0;
65my $maintainer_path;
66my $vcs_used = 0;
67
68my $exit = 0;
69
70my %commit_author_hash;
71my %commit_signer_hash;
72
73my @penguin_chief = ();
74push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
75#Andrew wants in on most everything - 2009/01/14
76#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
77
78my @penguin_chief_names = ();
79foreach my $chief (@penguin_chief) {
80    if ($chief =~ m/^(.*):(.*)/) {
81	my $chief_name = $1;
82	my $chief_addr = $2;
83	push(@penguin_chief_names, $chief_name);
84    }
85}
86my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
87
88# Signature types of people who are either
89# 	a) responsible for the code in question, or
90# 	b) familiar enough with it to give relevant feedback
91my @signature_tags = ();
92push(@signature_tags, "Signed-off-by:");
93push(@signature_tags, "Reviewed-by:");
94push(@signature_tags, "Acked-by:");
95
96my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
97
98# rfc822 email address - preloaded methods go here.
99my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
100my $rfc822_char = '[\\000-\\377]';
101
102# VCS command support: class-like functions and strings
103
104my %VCS_cmds;
105
106my %VCS_cmds_git = (
107    "execute_cmd" => \&git_execute_cmd,
108    "available" => '(which("git") ne "") && (-e ".git")',
109    "find_signers_cmd" =>
110	"git log --no-color --follow --since=\$email_git_since " .
111	    '--numstat --no-merges ' .
112	    '--format="GitCommit: %H%n' .
113		      'GitAuthor: %an <%ae>%n' .
114		      'GitDate: %aD%n' .
115		      'GitSubject: %s%n' .
116		      '%b%n"' .
117	    " -- \$file",
118    "find_commit_signers_cmd" =>
119	"git log --no-color " .
120	    '--numstat ' .
121	    '--format="GitCommit: %H%n' .
122		      'GitAuthor: %an <%ae>%n' .
123		      'GitDate: %aD%n' .
124		      'GitSubject: %s%n' .
125		      '%b%n"' .
126	    " -1 \$commit",
127    "find_commit_author_cmd" =>
128	"git log --no-color " .
129	    '--numstat ' .
130	    '--format="GitCommit: %H%n' .
131		      'GitAuthor: %an <%ae>%n' .
132		      'GitDate: %aD%n' .
133		      'GitSubject: %s%n"' .
134	    " -1 \$commit",
135    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
136    "blame_file_cmd" => "git blame -l \$file",
137    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
138    "blame_commit_pattern" => "^([0-9a-f]+) ",
139    "author_pattern" => "^GitAuthor: (.*)",
140    "subject_pattern" => "^GitSubject: (.*)",
141    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
142    "file_exists_cmd" => "git ls-files \$file",
143    "list_files_cmd" => "git ls-files \$file",
144);
145
146my %VCS_cmds_hg = (
147    "execute_cmd" => \&hg_execute_cmd,
148    "available" => '(which("hg") ne "") && (-d ".hg")',
149    "find_signers_cmd" =>
150	"hg log --date=\$email_hg_since " .
151	    "--template='HgCommit: {node}\\n" .
152	                "HgAuthor: {author}\\n" .
153			"HgSubject: {desc}\\n'" .
154	    " -- \$file",
155    "find_commit_signers_cmd" =>
156	"hg log " .
157	    "--template='HgSubject: {desc}\\n'" .
158	    " -r \$commit",
159    "find_commit_author_cmd" =>
160	"hg log " .
161	    "--template='HgCommit: {node}\\n" .
162		        "HgAuthor: {author}\\n" .
163			"HgSubject: {desc|firstline}\\n'" .
164	    " -r \$commit",
165    "blame_range_cmd" => "",		# not supported
166    "blame_file_cmd" => "hg blame -n \$file",
167    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
168    "blame_commit_pattern" => "^([ 0-9a-f]+):",
169    "author_pattern" => "^HgAuthor: (.*)",
170    "subject_pattern" => "^HgSubject: (.*)",
171    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
172    "file_exists_cmd" => "hg files \$file",
173    "list_files_cmd" => "hg manifest -R \$file",
174);
175
176my $conf = which_conf(".get_maintainer.conf");
177if (-f $conf) {
178    my @conf_args;
179    open(my $conffile, '<', "$conf")
180	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
181
182    while (<$conffile>) {
183	my $line = $_;
184
185	$line =~ s/\s*\n?$//g;
186	$line =~ s/^\s*//g;
187	$line =~ s/\s+/ /g;
188
189	next if ($line =~ m/^\s*#/);
190	next if ($line =~ m/^\s*$/);
191
192	my @words = split(" ", $line);
193	foreach my $word (@words) {
194	    last if ($word =~ m/^#/);
195	    push (@conf_args, $word);
196	}
197    }
198    close($conffile);
199    unshift(@ARGV, @conf_args) if @conf_args;
200}
201
202my @ignore_emails = ();
203my $ignore_file = which_conf(".get_maintainer.ignore");
204if (-f $ignore_file) {
205    open(my $ignore, '<', "$ignore_file")
206	or warn "$P: Can't find a readable .get_maintainer.ignore file $!\n";
207    while (<$ignore>) {
208	my $line = $_;
209
210	$line =~ s/\s*\n?$//;
211	$line =~ s/^\s*//;
212	$line =~ s/\s+$//;
213	$line =~ s/#.*$//;
214
215	next if ($line =~ m/^\s*$/);
216	if (rfc822_valid($line)) {
217	    push(@ignore_emails, $line);
218	}
219    }
220    close($ignore);
221}
222
223if ($#ARGV > 0) {
224    foreach (@ARGV) {
225        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
226            die "$P: using --self-test does not allow any other option or argument\n";
227        }
228    }
229}
230
231if (!GetOptions(
232		'email!' => \$email,
233		'git!' => \$email_git,
234		'git-all-signature-types!' => \$email_git_all_signature_types,
235		'git-blame!' => \$email_git_blame,
236		'git-blame-signatures!' => \$email_git_blame_signatures,
237		'git-fallback!' => \$email_git_fallback,
238		'git-chief-penguins!' => \$email_git_penguin_chiefs,
239		'git-min-signatures=i' => \$email_git_min_signatures,
240		'git-max-maintainers=i' => \$email_git_max_maintainers,
241		'git-min-percent=i' => \$email_git_min_percent,
242		'git-since=s' => \$email_git_since,
243		'hg-since=s' => \$email_hg_since,
244		'i|interactive!' => \$interactive,
245		'remove-duplicates!' => \$email_remove_duplicates,
246		'mailmap!' => \$email_use_mailmap,
247		'm!' => \$email_maintainer,
248		'r!' => \$email_reviewer,
249		'n!' => \$email_usename,
250		'l!' => \$email_list,
251		's!' => \$email_subscriber_list,
252		'multiline!' => \$output_multiline,
253		'roles!' => \$output_roles,
254		'rolestats!' => \$output_rolestats,
255		'separator=s' => \$output_separator,
256		'subsystem!' => \$subsystem,
257		'status!' => \$status,
258		'scm!' => \$scm,
259		'tree!' => \$tree,
260		'web!' => \$web,
261		'letters=s' => \$letters,
262		'pattern-depth=i' => \$pattern_depth,
263		'k|keywords!' => \$keywords,
264		'sections!' => \$sections,
265		'fe|file-emails!' => \$file_emails,
266		'f|file' => \$from_filename,
267		'find-maintainer-files' => \$find_maintainer_files,
268		'mpath|maintainer-path=s' => \$maintainer_path,
269		'self-test:s' => \$self_test,
270		'v|version' => \$version,
271		'h|help|usage' => \$help,
272		)) {
273    die "$P: invalid argument - use --help if necessary\n";
274}
275
276if ($help != 0) {
277    usage();
278    exit 0;
279}
280
281if ($version != 0) {
282    print("${P} ${V}\n");
283    exit 0;
284}
285
286if (defined $self_test) {
287    read_all_maintainer_files();
288    self_test();
289    exit 0;
290}
291
292if (-t STDIN && !@ARGV) {
293    # We're talking to a terminal, but have no command line arguments.
294    die "$P: missing patchfile or -f file - use --help if necessary\n";
295}
296
297$output_multiline = 0 if ($output_separator ne ", ");
298$output_rolestats = 1 if ($interactive);
299$output_roles = 1 if ($output_rolestats);
300
301if ($sections || $letters ne "") {
302    $sections = 1;
303    $email = 0;
304    $email_list = 0;
305    $scm = 0;
306    $status = 0;
307    $subsystem = 0;
308    $web = 0;
309    $keywords = 0;
310    $interactive = 0;
311} else {
312    my $selections = $email + $scm + $status + $subsystem + $web;
313    if ($selections == 0) {
314	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
315    }
316}
317
318if ($email &&
319    ($email_maintainer + $email_reviewer +
320     $email_list + $email_subscriber_list +
321     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
322    die "$P: Please select at least 1 email option\n";
323}
324
325if ($tree && !top_of_kernel_tree($lk_path)) {
326    die "$P: The current directory does not appear to be "
327	. "a linux kernel source tree.\n";
328}
329
330## Read MAINTAINERS for type/value pairs
331
332my @typevalue = ();
333my %keyword_hash;
334my @mfiles = ();
335my @self_test_info = ();
336
337sub read_maintainer_file {
338    my ($file) = @_;
339
340    open (my $maint, '<', "$file")
341	or die "$P: Can't open MAINTAINERS file '$file': $!\n";
342    my $i = 1;
343    while (<$maint>) {
344	my $line = $_;
345	chomp $line;
346
347	if ($line =~ m/^([A-Z]):\s*(.*)/) {
348	    my $type = $1;
349	    my $value = $2;
350
351	    ##Filename pattern matching
352	    if ($type eq "F" || $type eq "X") {
353		$value =~ s@\.@\\\.@g;       ##Convert . to \.
354		$value =~ s/\*/\.\*/g;       ##Convert * to .*
355		$value =~ s/\?/\./g;         ##Convert ? to .
356		##if pattern is a directory and it lacks a trailing slash, add one
357		if ((-d $value)) {
358		    $value =~ s@([^/])$@$1/@;
359		}
360	    } elsif ($type eq "K") {
361		$keyword_hash{@typevalue} = $value;
362	    }
363	    push(@typevalue, "$type:$value");
364	} elsif (!(/^\s*$/ || /^\s*\#/)) {
365	    push(@typevalue, $line);
366	}
367	if (defined $self_test) {
368	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
369	}
370	$i++;
371    }
372    close($maint);
373}
374
375sub find_is_maintainer_file {
376    my ($file) = $_;
377    return if ($file !~ m@/MAINTAINERS$@);
378    $file = $File::Find::name;
379    return if (! -f $file);
380    push(@mfiles, $file);
381}
382
383sub find_ignore_git {
384    return grep { $_ !~ /^\.git$/; } @_;
385}
386
387read_all_maintainer_files();
388
389sub read_all_maintainer_files {
390    my $path = "${lk_path}MAINTAINERS";
391    if (defined $maintainer_path) {
392	$path = $maintainer_path;
393	# Perl Cookbook tilde expansion if necessary
394	$path =~ s@^~([^/]*)@ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7])@ex;
395    }
396
397    if (-d $path) {
398	$path .= '/' if ($path !~ m@/$@);
399	if ($find_maintainer_files) {
400	    find( { wanted => \&find_is_maintainer_file,
401		    preprocess => \&find_ignore_git,
402		    no_chdir => 1,
403		}, "$path");
404	} else {
405	    opendir(DIR, "$path") or die $!;
406	    my @files = readdir(DIR);
407	    closedir(DIR);
408	    foreach my $file (@files) {
409		push(@mfiles, "$path$file") if ($file !~ /^\./);
410	    }
411	}
412    } elsif (-f "$path") {
413	push(@mfiles, "$path");
414    } else {
415	die "$P: MAINTAINER file not found '$path'\n";
416    }
417    die "$P: No MAINTAINER files found in '$path'\n" if (scalar(@mfiles) == 0);
418    foreach my $file (@mfiles) {
419	read_maintainer_file("$file");
420    }
421}
422
423#
424# Read mail address map
425#
426
427my $mailmap;
428
429read_mailmap();
430
431sub read_mailmap {
432    $mailmap = {
433	names => {},
434	addresses => {}
435    };
436
437    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
438
439    open(my $mailmap_file, '<', "${lk_path}.mailmap")
440	or warn "$P: Can't open .mailmap: $!\n";
441
442    while (<$mailmap_file>) {
443	s/#.*$//; #strip comments
444	s/^\s+|\s+$//g; #trim
445
446	next if (/^\s*$/); #skip empty lines
447	#entries have one of the following formats:
448	# name1 <mail1>
449	# <mail1> <mail2>
450	# name1 <mail1> <mail2>
451	# name1 <mail1> name2 <mail2>
452	# (see man git-shortlog)
453
454	if (/^([^<]+)<([^>]+)>$/) {
455	    my $real_name = $1;
456	    my $address = $2;
457
458	    $real_name =~ s/\s+$//;
459	    ($real_name, $address) = parse_email("$real_name <$address>");
460	    $mailmap->{names}->{$address} = $real_name;
461
462	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
463	    my $real_address = $1;
464	    my $wrong_address = $2;
465
466	    $mailmap->{addresses}->{$wrong_address} = $real_address;
467
468	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
469	    my $real_name = $1;
470	    my $real_address = $2;
471	    my $wrong_address = $3;
472
473	    $real_name =~ s/\s+$//;
474	    ($real_name, $real_address) =
475		parse_email("$real_name <$real_address>");
476	    $mailmap->{names}->{$wrong_address} = $real_name;
477	    $mailmap->{addresses}->{$wrong_address} = $real_address;
478
479	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
480	    my $real_name = $1;
481	    my $real_address = $2;
482	    my $wrong_name = $3;
483	    my $wrong_address = $4;
484
485	    $real_name =~ s/\s+$//;
486	    ($real_name, $real_address) =
487		parse_email("$real_name <$real_address>");
488
489	    $wrong_name =~ s/\s+$//;
490	    ($wrong_name, $wrong_address) =
491		parse_email("$wrong_name <$wrong_address>");
492
493	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
494	    $mailmap->{names}->{$wrong_email} = $real_name;
495	    $mailmap->{addresses}->{$wrong_email} = $real_address;
496	}
497    }
498    close($mailmap_file);
499}
500
501## use the filenames on the command line or find the filenames in the patchfiles
502
503my @files = ();
504my @range = ();
505my @keyword_tvi = ();
506my @file_emails = ();
507
508if (!@ARGV) {
509    push(@ARGV, "&STDIN");
510}
511
512foreach my $file (@ARGV) {
513    if ($file ne "&STDIN") {
514	##if $file is a directory and it lacks a trailing slash, add one
515	if ((-d $file)) {
516	    $file =~ s@([^/])$@$1/@;
517	} elsif (!(-f $file)) {
518	    die "$P: file '${file}' not found\n";
519	}
520    }
521    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
522	$file =~ s/^\Q${cur_path}\E//;	#strip any absolute path
523	$file =~ s/^\Q${lk_path}\E//;	#or the path to the lk tree
524	push(@files, $file);
525	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
526	    open(my $f, '<', $file)
527		or die "$P: Can't open $file: $!\n";
528	    my $text = do { local($/) ; <$f> };
529	    close($f);
530	    if ($keywords) {
531		foreach my $line (keys %keyword_hash) {
532		    if ($text =~ m/$keyword_hash{$line}/x) {
533			push(@keyword_tvi, $line);
534		    }
535		}
536	    }
537	    if ($file_emails) {
538		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
539		push(@file_emails, clean_file_emails(@poss_addr));
540	    }
541	}
542    } else {
543	my $file_cnt = @files;
544	my $lastfile;
545
546	open(my $patch, "< $file")
547	    or die "$P: Can't open $file: $!\n";
548
549	# We can check arbitrary information before the patch
550	# like the commit message, mail headers, etc...
551	# This allows us to match arbitrary keywords against any part
552	# of a git format-patch generated file (subject tags, etc...)
553
554	my $patch_prefix = "";			#Parsing the intro
555
556	while (<$patch>) {
557	    my $patch_line = $_;
558	    if (m/^ mode change [0-7]+ => [0-7]+ (\S+)\s*$/) {
559		my $filename = $1;
560		push(@files, $filename);
561	    } elsif (m/^rename (?:from|to) (\S+)\s*$/) {
562		my $filename = $1;
563		push(@files, $filename);
564	    } elsif (m/^diff --git a\/(\S+) b\/(\S+)\s*$/) {
565		my $filename1 = $1;
566		my $filename2 = $2;
567		push(@files, $filename1);
568		push(@files, $filename2);
569	    } elsif (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
570		my $filename = $1;
571		$filename =~ s@^[^/]*/@@;
572		$filename =~ s@\n@@;
573		$lastfile = $filename;
574		push(@files, $filename);
575		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
576	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
577		if ($email_git_blame) {
578		    push(@range, "$lastfile:$1:$2");
579		}
580	    } elsif ($keywords) {
581		foreach my $line (keys %keyword_hash) {
582		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
583			push(@keyword_tvi, $line);
584		    }
585		}
586	    }
587	}
588	close($patch);
589
590	if ($file_cnt == @files) {
591	    warn "$P: file '${file}' doesn't appear to be a patch.  "
592		. "Add -f to options?\n";
593	}
594	@files = sort_and_uniq(@files);
595    }
596}
597
598@file_emails = uniq(@file_emails);
599
600my %email_hash_name;
601my %email_hash_address;
602my @email_to = ();
603my %hash_list_to;
604my @list_to = ();
605my @scm = ();
606my @web = ();
607my @subsystem = ();
608my @status = ();
609my %deduplicate_name_hash = ();
610my %deduplicate_address_hash = ();
611
612my @maintainers = get_maintainers();
613
614if (@maintainers) {
615    @maintainers = merge_email(@maintainers);
616    output(@maintainers);
617}
618
619if ($scm) {
620    @scm = uniq(@scm);
621    output(@scm);
622}
623
624if ($status) {
625    @status = uniq(@status);
626    output(@status);
627}
628
629if ($subsystem) {
630    @subsystem = uniq(@subsystem);
631    output(@subsystem);
632}
633
634if ($web) {
635    @web = uniq(@web);
636    output(@web);
637}
638
639exit($exit);
640
641sub self_test {
642    my @lsfiles = ();
643    my @good_links = ();
644    my @bad_links = ();
645    my @section_headers = ();
646    my $index = 0;
647
648    @lsfiles = vcs_list_files($lk_path);
649
650    for my $x (@self_test_info) {
651	$index++;
652
653	## Section header duplication and missing section content
654	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
655	    $x->{line} =~ /^\S[^:]/ &&
656	    defined $self_test_info[$index] &&
657	    $self_test_info[$index]->{line} =~ /^([A-Z]):\s*\S/) {
658	    my $has_S = 0;
659	    my $has_F = 0;
660	    my $has_ML = 0;
661	    my $status = "";
662	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
663		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
664	    } else {
665		push(@section_headers, $x->{line});
666	    }
667	    my $nextline = $index;
668	    while (defined $self_test_info[$nextline] &&
669		   $self_test_info[$nextline]->{line} =~ /^([A-Z]):\s*(\S.*)/) {
670		my $type = $1;
671		my $value = $2;
672		if ($type eq "S") {
673		    $has_S = 1;
674		    $status = $value;
675		} elsif ($type eq "F" || $type eq "N") {
676		    $has_F = 1;
677		} elsif ($type eq "M" || $type eq "R" || $type eq "L") {
678		    $has_ML = 1;
679		}
680		$nextline++;
681	    }
682	    if (!$has_ML && $status !~ /orphan|obsolete/i) {
683		print("$x->{file}:$x->{linenr}: warning: section without email address\t$x->{line}\n");
684	    }
685	    if (!$has_S) {
686		print("$x->{file}:$x->{linenr}: warning: section without status \t$x->{line}\n");
687	    }
688	    if (!$has_F) {
689		print("$x->{file}:$x->{linenr}: warning: section without file pattern\t$x->{line}\n");
690	    }
691	}
692
693	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
694
695	my $type = $1;
696	my $value = $2;
697
698	## Filename pattern matching
699	if (($type eq "F" || $type eq "X") &&
700	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
701	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
702	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
703	    $value =~ s/\?/\./g;         ##Convert ? to .
704	    ##if pattern is a directory and it lacks a trailing slash, add one
705	    if ((-d $value)) {
706		$value =~ s@([^/])$@$1/@;
707	    }
708	    if (!grep(m@^$value@, @lsfiles)) {
709		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
710	    }
711
712	## Link reachability
713	} elsif (($type eq "W" || $type eq "Q" || $type eq "B") &&
714		 $value =~ /^https?:/ &&
715		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
716	    next if (grep(m@^\Q$value\E$@, @good_links));
717	    my $isbad = 0;
718	    if (grep(m@^\Q$value\E$@, @bad_links)) {
719	        $isbad = 1;
720	    } else {
721		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
722		if ($? == 0) {
723		    push(@good_links, $value);
724		} else {
725		    push(@bad_links, $value);
726		    $isbad = 1;
727		}
728	    }
729	    if ($isbad) {
730	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
731	    }
732
733	## SCM reachability
734	} elsif ($type eq "T" &&
735		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
736	    next if (grep(m@^\Q$value\E$@, @good_links));
737	    my $isbad = 0;
738	    if (grep(m@^\Q$value\E$@, @bad_links)) {
739	        $isbad = 1;
740            } elsif ($value !~ /^(?:git|quilt|hg)\s+\S/) {
741		print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
742	    } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
743		my $url = $1;
744		my $branch = "";
745		$branch = $3 if $3;
746		my $output = `git ls-remote --exit-code -h "$url" $branch > /dev/null 2>&1`;
747		if ($? == 0) {
748		    push(@good_links, $value);
749		} else {
750		    push(@bad_links, $value);
751		    $isbad = 1;
752		}
753	    } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
754		my $url = $1;
755		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
756		if ($? == 0) {
757		    push(@good_links, $value);
758		} else {
759		    push(@bad_links, $value);
760		    $isbad = 1;
761		}
762	    }
763	    if ($isbad) {
764		print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
765	    }
766	}
767    }
768}
769
770sub ignore_email_address {
771    my ($address) = @_;
772
773    foreach my $ignore (@ignore_emails) {
774	return 1 if ($ignore eq $address);
775    }
776
777    return 0;
778}
779
780sub range_is_maintained {
781    my ($start, $end) = @_;
782
783    for (my $i = $start; $i < $end; $i++) {
784	my $line = $typevalue[$i];
785	if ($line =~ m/^([A-Z]):\s*(.*)/) {
786	    my $type = $1;
787	    my $value = $2;
788	    if ($type eq 'S') {
789		if ($value =~ /(maintain|support)/i) {
790		    return 1;
791		}
792	    }
793	}
794    }
795    return 0;
796}
797
798sub range_has_maintainer {
799    my ($start, $end) = @_;
800
801    for (my $i = $start; $i < $end; $i++) {
802	my $line = $typevalue[$i];
803	if ($line =~ m/^([A-Z]):\s*(.*)/) {
804	    my $type = $1;
805	    my $value = $2;
806	    if ($type eq 'M') {
807		return 1;
808	    }
809	}
810    }
811    return 0;
812}
813
814sub get_maintainers {
815    %email_hash_name = ();
816    %email_hash_address = ();
817    %commit_author_hash = ();
818    %commit_signer_hash = ();
819    @email_to = ();
820    %hash_list_to = ();
821    @list_to = ();
822    @scm = ();
823    @web = ();
824    @subsystem = ();
825    @status = ();
826    %deduplicate_name_hash = ();
827    %deduplicate_address_hash = ();
828    if ($email_git_all_signature_types) {
829	$signature_pattern = "(.+?)[Bb][Yy]:";
830    } else {
831	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
832    }
833
834    # Find responsible parties
835
836    my %exact_pattern_match_hash = ();
837
838    foreach my $file (@files) {
839
840	my %hash;
841	my $tvi = find_first_section();
842	while ($tvi < @typevalue) {
843	    my $start = find_starting_index($tvi);
844	    my $end = find_ending_index($tvi);
845	    my $exclude = 0;
846	    my $i;
847
848	    #Do not match excluded file patterns
849
850	    for ($i = $start; $i < $end; $i++) {
851		my $line = $typevalue[$i];
852		if ($line =~ m/^([A-Z]):\s*(.*)/) {
853		    my $type = $1;
854		    my $value = $2;
855		    if ($type eq 'X') {
856			if (file_match_pattern($file, $value)) {
857			    $exclude = 1;
858			    last;
859			}
860		    }
861		}
862	    }
863
864	    if (!$exclude) {
865		for ($i = $start; $i < $end; $i++) {
866		    my $line = $typevalue[$i];
867		    if ($line =~ m/^([A-Z]):\s*(.*)/) {
868			my $type = $1;
869			my $value = $2;
870			if ($type eq 'F') {
871			    if (file_match_pattern($file, $value)) {
872				my $value_pd = ($value =~ tr@/@@);
873				my $file_pd = ($file  =~ tr@/@@);
874				$value_pd++ if (substr($value,-1,1) ne "/");
875				$value_pd = -1 if ($value =~ /^\.\*/);
876				if ($value_pd >= $file_pd &&
877				    range_is_maintained($start, $end) &&
878				    range_has_maintainer($start, $end)) {
879				    $exact_pattern_match_hash{$file} = 1;
880				}
881				if ($pattern_depth == 0 ||
882				    (($file_pd - $value_pd) < $pattern_depth)) {
883				    $hash{$tvi} = $value_pd;
884				}
885			    }
886			} elsif ($type eq 'N') {
887			    if ($file =~ m/$value/x) {
888				$hash{$tvi} = 0;
889			    }
890			}
891		    }
892		}
893	    }
894	    $tvi = $end + 1;
895	}
896
897	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
898	    add_categories($line);
899	    if ($sections) {
900		my $i;
901		my $start = find_starting_index($line);
902		my $end = find_ending_index($line);
903		for ($i = $start; $i < $end; $i++) {
904		    my $line = $typevalue[$i];
905		    if ($line =~ /^[FX]:/) {		##Restore file patterns
906			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
907			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
908			$line =~ s/\\\./\./g;       	##Convert \. to .
909			$line =~ s/\.\*/\*/g;       	##Convert .* to *
910		    }
911		    my $count = $line =~ s/^([A-Z]):/$1:\t/g;
912		    if ($letters eq "" || (!$count || $letters =~ /$1/i)) {
913			print("$line\n");
914		    }
915		}
916		print("\n");
917	    }
918	}
919    }
920
921    if ($keywords) {
922	@keyword_tvi = sort_and_uniq(@keyword_tvi);
923	foreach my $line (@keyword_tvi) {
924	    add_categories($line);
925	}
926    }
927
928    foreach my $email (@email_to, @list_to) {
929	$email->[0] = deduplicate_email($email->[0]);
930    }
931
932    foreach my $file (@files) {
933	if ($email &&
934	    ($email_git || ($email_git_fallback &&
935			    !$exact_pattern_match_hash{$file}))) {
936	    vcs_file_signoffs($file);
937	}
938	if ($email && $email_git_blame) {
939	    vcs_file_blame($file);
940	}
941    }
942
943    if ($email) {
944	foreach my $chief (@penguin_chief) {
945	    if ($chief =~ m/^(.*):(.*)/) {
946		my $email_address;
947
948		$email_address = format_email($1, $2, $email_usename);
949		if ($email_git_penguin_chiefs) {
950		    push(@email_to, [$email_address, 'chief penguin']);
951		} else {
952		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
953		}
954	    }
955	}
956
957	foreach my $email (@file_emails) {
958	    my ($name, $address) = parse_email($email);
959
960	    my $tmp_email = format_email($name, $address, $email_usename);
961	    push_email_address($tmp_email, '');
962	    add_role($tmp_email, 'in file');
963	}
964    }
965
966    my @to = ();
967    if ($email || $email_list) {
968	if ($email) {
969	    @to = (@to, @email_to);
970	}
971	if ($email_list) {
972	    @to = (@to, @list_to);
973	}
974    }
975
976    if ($interactive) {
977	@to = interactive_get_maintainers(\@to);
978    }
979
980    return @to;
981}
982
983sub file_match_pattern {
984    my ($file, $pattern) = @_;
985    if (substr($pattern, -1) eq "/") {
986	if ($file =~ m@^$pattern@) {
987	    return 1;
988	}
989    } else {
990	if ($file =~ m@^$pattern@) {
991	    my $s1 = ($file =~ tr@/@@);
992	    my $s2 = ($pattern =~ tr@/@@);
993	    if ($s1 == $s2) {
994		return 1;
995	    }
996	}
997    }
998    return 0;
999}
1000
1001sub usage {
1002    print <<EOT;
1003usage: $P [options] patchfile
1004       $P [options] -f file|directory
1005version: $V
1006
1007MAINTAINER field selection options:
1008  --email => print email address(es) if any
1009    --git => include recent git \*-by: signers
1010    --git-all-signature-types => include signers regardless of signature type
1011        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
1012    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
1013    --git-chief-penguins => include ${penguin_chiefs}
1014    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
1015    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
1016    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
1017    --git-blame => use git blame to find modified commits for patch or file
1018    --git-blame-signatures => when used with --git-blame, also include all commit signers
1019    --git-since => git history to use (default: $email_git_since)
1020    --hg-since => hg history to use (default: $email_hg_since)
1021    --interactive => display a menu (mostly useful if used with the --git option)
1022    --m => include maintainer(s) if any
1023    --r => include reviewer(s) if any
1024    --n => include name 'Full Name <addr\@domain.tld>'
1025    --l => include list(s) if any
1026    --s => include subscriber only list(s) if any
1027    --remove-duplicates => minimize duplicate email names/addresses
1028    --roles => show roles (status:subsystem, git-signer, list, etc...)
1029    --rolestats => show roles and statistics (commits/total_commits, %)
1030    --file-emails => add email addresses found in -f file (default: 0 (off))
1031  --scm => print SCM tree(s) if any
1032  --status => print status if any
1033  --subsystem => print subsystem name if any
1034  --web => print website(s) if any
1035
1036Output type options:
1037  --separator [, ] => separator for multiple entries on 1 line
1038    using --separator also sets --nomultiline if --separator is not [, ]
1039  --multiline => print 1 entry per line
1040
1041Other options:
1042  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
1043  --keywords => scan patch for keywords (default: $keywords)
1044  --sections => print all of the subsystem sections with pattern matches
1045  --letters => print all matching 'letter' types from all matching sections
1046  --mailmap => use .mailmap file (default: $email_use_mailmap)
1047  --no-tree => run without a kernel tree
1048  --self-test => show potential issues with MAINTAINERS file content
1049  --version => show version
1050  --help => show this help information
1051
1052Default options:
1053  [--email --tree --nogit --git-fallback --m --r --n --l --multiline
1054   --pattern-depth=0 --remove-duplicates --rolestats]
1055
1056Notes:
1057  Using "-f directory" may give unexpected results:
1058      Used with "--git", git signators for _all_ files in and below
1059          directory are examined as git recurses directories.
1060          Any specified X: (exclude) pattern matches are _not_ ignored.
1061      Used with "--nogit", directory is used as a pattern match,
1062          no individual file within the directory or subdirectory
1063          is matched.
1064      Used with "--git-blame", does not iterate all files in directory
1065  Using "--git-blame" is slow and may add old committers and authors
1066      that are no longer active maintainers to the output.
1067  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
1068      other automated tools that expect only ["name"] <email address>
1069      may not work because of additional output after <email address>.
1070  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
1071      not the percentage of the entire file authored.  # of commits is
1072      not a good measure of amount of code authored.  1 major commit may
1073      contain a thousand lines, 5 trivial commits may modify a single line.
1074  If git is not installed, but mercurial (hg) is installed and an .hg
1075      repository exists, the following options apply to mercurial:
1076          --git,
1077          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
1078          --git-blame
1079      Use --hg-since not --git-since to control date selection
1080  File ".get_maintainer.conf", if it exists in the linux kernel source root
1081      directory, can change whatever get_maintainer defaults are desired.
1082      Entries in this file can be any command line argument.
1083      This file is prepended to any additional command line arguments.
1084      Multiple lines and # comments are allowed.
1085  Most options have both positive and negative forms.
1086      The negative forms for --<foo> are --no<foo> and --no-<foo>.
1087
1088EOT
1089}
1090
1091sub top_of_kernel_tree {
1092    my ($lk_path) = @_;
1093
1094    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
1095	$lk_path .= "/";
1096    }
1097    if (   (-f "${lk_path}COPYING")
1098	&& (-f "${lk_path}CREDITS")
1099	&& (-f "${lk_path}Kbuild")
1100	&& (-e "${lk_path}MAINTAINERS")
1101	&& (-f "${lk_path}Makefile")
1102	&& (-f "${lk_path}README")
1103	&& (-d "${lk_path}Documentation")
1104	&& (-d "${lk_path}arch")
1105	&& (-d "${lk_path}include")
1106	&& (-d "${lk_path}drivers")
1107	&& (-d "${lk_path}fs")
1108	&& (-d "${lk_path}init")
1109	&& (-d "${lk_path}ipc")
1110	&& (-d "${lk_path}kernel")
1111	&& (-d "${lk_path}lib")
1112	&& (-d "${lk_path}scripts")) {
1113	return 1;
1114    }
1115    return 0;
1116}
1117
1118sub parse_email {
1119    my ($formatted_email) = @_;
1120
1121    my $name = "";
1122    my $address = "";
1123
1124    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
1125	$name = $1;
1126	$address = $2;
1127    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
1128	$address = $1;
1129    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
1130	$address = $1;
1131    }
1132
1133    $name =~ s/^\s+|\s+$//g;
1134    $name =~ s/^\"|\"$//g;
1135    $address =~ s/^\s+|\s+$//g;
1136
1137    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
1138	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1139	$name = "\"$name\"";
1140    }
1141
1142    return ($name, $address);
1143}
1144
1145sub format_email {
1146    my ($name, $address, $usename) = @_;
1147
1148    my $formatted_email;
1149
1150    $name =~ s/^\s+|\s+$//g;
1151    $name =~ s/^\"|\"$//g;
1152    $address =~ s/^\s+|\s+$//g;
1153
1154    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
1155	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
1156	$name = "\"$name\"";
1157    }
1158
1159    if ($usename) {
1160	if ("$name" eq "") {
1161	    $formatted_email = "$address";
1162	} else {
1163	    $formatted_email = "$name <$address>";
1164	}
1165    } else {
1166	$formatted_email = $address;
1167    }
1168
1169    return $formatted_email;
1170}
1171
1172sub find_first_section {
1173    my $index = 0;
1174
1175    while ($index < @typevalue) {
1176	my $tv = $typevalue[$index];
1177	if (($tv =~ m/^([A-Z]):\s*(.*)/)) {
1178	    last;
1179	}
1180	$index++;
1181    }
1182
1183    return $index;
1184}
1185
1186sub find_starting_index {
1187    my ($index) = @_;
1188
1189    while ($index > 0) {
1190	my $tv = $typevalue[$index];
1191	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1192	    last;
1193	}
1194	$index--;
1195    }
1196
1197    return $index;
1198}
1199
1200sub find_ending_index {
1201    my ($index) = @_;
1202
1203    while ($index < @typevalue) {
1204	my $tv = $typevalue[$index];
1205	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
1206	    last;
1207	}
1208	$index++;
1209    }
1210
1211    return $index;
1212}
1213
1214sub get_subsystem_name {
1215    my ($index) = @_;
1216
1217    my $start = find_starting_index($index);
1218
1219    my $subsystem = $typevalue[$start];
1220    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
1221	$subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
1222	$subsystem =~ s/\s*$//;
1223	$subsystem = $subsystem . "...";
1224    }
1225    return $subsystem;
1226}
1227
1228sub get_maintainer_role {
1229    my ($index) = @_;
1230
1231    my $i;
1232    my $start = find_starting_index($index);
1233    my $end = find_ending_index($index);
1234
1235    my $role = "unknown";
1236    my $subsystem = get_subsystem_name($index);
1237
1238    for ($i = $start + 1; $i < $end; $i++) {
1239	my $tv = $typevalue[$i];
1240	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1241	    my $ptype = $1;
1242	    my $pvalue = $2;
1243	    if ($ptype eq "S") {
1244		$role = $pvalue;
1245	    }
1246	}
1247    }
1248
1249    $role = lc($role);
1250    if      ($role eq "supported") {
1251	$role = "supporter";
1252    } elsif ($role eq "maintained") {
1253	$role = "maintainer";
1254    } elsif ($role eq "odd fixes") {
1255	$role = "odd fixer";
1256    } elsif ($role eq "orphan") {
1257	$role = "orphan minder";
1258    } elsif ($role eq "obsolete") {
1259	$role = "obsolete minder";
1260    } elsif ($role eq "buried alive in reporters") {
1261	$role = "chief penguin";
1262    }
1263
1264    return $role . ":" . $subsystem;
1265}
1266
1267sub get_list_role {
1268    my ($index) = @_;
1269
1270    my $subsystem = get_subsystem_name($index);
1271
1272    if ($subsystem eq "THE REST") {
1273	$subsystem = "";
1274    }
1275
1276    return $subsystem;
1277}
1278
1279sub add_categories {
1280    my ($index) = @_;
1281
1282    my $i;
1283    my $start = find_starting_index($index);
1284    my $end = find_ending_index($index);
1285
1286    push(@subsystem, $typevalue[$start]);
1287
1288    for ($i = $start + 1; $i < $end; $i++) {
1289	my $tv = $typevalue[$i];
1290	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1291	    my $ptype = $1;
1292	    my $pvalue = $2;
1293	    if ($ptype eq "L") {
1294		my $list_address = $pvalue;
1295		my $list_additional = "";
1296		my $list_role = get_list_role($i);
1297
1298		if ($list_role ne "") {
1299		    $list_role = ":" . $list_role;
1300		}
1301		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1302		    $list_address = $1;
1303		    $list_additional = $2;
1304		}
1305		if ($list_additional =~ m/subscribers-only/) {
1306		    if ($email_subscriber_list) {
1307			if (!$hash_list_to{lc($list_address)}) {
1308			    $hash_list_to{lc($list_address)} = 1;
1309			    push(@list_to, [$list_address,
1310					    "subscriber list${list_role}"]);
1311			}
1312		    }
1313		} else {
1314		    if ($email_list) {
1315			if (!$hash_list_to{lc($list_address)}) {
1316			    $hash_list_to{lc($list_address)} = 1;
1317			    if ($list_additional =~ m/moderated/) {
1318				push(@list_to, [$list_address,
1319						"moderated list${list_role}"]);
1320			    } else {
1321				push(@list_to, [$list_address,
1322						"open list${list_role}"]);
1323			    }
1324			}
1325		    }
1326		}
1327	    } elsif ($ptype eq "M") {
1328		my ($name, $address) = parse_email($pvalue);
1329		if ($name eq "") {
1330		    if ($i > 0) {
1331			my $tv = $typevalue[$i - 1];
1332			if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1333			    if ($1 eq "P") {
1334				$name = $2;
1335				$pvalue = format_email($name, $address, $email_usename);
1336			    }
1337			}
1338		    }
1339		}
1340		if ($email_maintainer) {
1341		    my $role = get_maintainer_role($i);
1342		    push_email_addresses($pvalue, $role);
1343		}
1344	    } elsif ($ptype eq "R") {
1345		my ($name, $address) = parse_email($pvalue);
1346		if ($name eq "") {
1347		    if ($i > 0) {
1348			my $tv = $typevalue[$i - 1];
1349			if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1350			    if ($1 eq "P") {
1351				$name = $2;
1352				$pvalue = format_email($name, $address, $email_usename);
1353			    }
1354			}
1355		    }
1356		}
1357		if ($email_reviewer) {
1358		    my $subsystem = get_subsystem_name($i);
1359		    push_email_addresses($pvalue, "reviewer:$subsystem");
1360		}
1361	    } elsif ($ptype eq "T") {
1362		push(@scm, $pvalue);
1363	    } elsif ($ptype eq "W") {
1364		push(@web, $pvalue);
1365	    } elsif ($ptype eq "S") {
1366		push(@status, $pvalue);
1367	    }
1368	}
1369    }
1370}
1371
1372sub email_inuse {
1373    my ($name, $address) = @_;
1374
1375    return 1 if (($name eq "") && ($address eq ""));
1376    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1377    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1378
1379    return 0;
1380}
1381
1382sub push_email_address {
1383    my ($line, $role) = @_;
1384
1385    my ($name, $address) = parse_email($line);
1386
1387    if ($address eq "") {
1388	return 0;
1389    }
1390
1391    if (!$email_remove_duplicates) {
1392	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1393    } elsif (!email_inuse($name, $address)) {
1394	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1395	$email_hash_name{lc($name)}++ if ($name ne "");
1396	$email_hash_address{lc($address)}++;
1397    }
1398
1399    return 1;
1400}
1401
1402sub push_email_addresses {
1403    my ($address, $role) = @_;
1404
1405    my @address_list = ();
1406
1407    if (rfc822_valid($address)) {
1408	push_email_address($address, $role);
1409    } elsif (@address_list = rfc822_validlist($address)) {
1410	my $array_count = shift(@address_list);
1411	while (my $entry = shift(@address_list)) {
1412	    push_email_address($entry, $role);
1413	}
1414    } else {
1415	if (!push_email_address($address, $role)) {
1416	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1417	}
1418    }
1419}
1420
1421sub add_role {
1422    my ($line, $role) = @_;
1423
1424    my ($name, $address) = parse_email($line);
1425    my $email = format_email($name, $address, $email_usename);
1426
1427    foreach my $entry (@email_to) {
1428	if ($email_remove_duplicates) {
1429	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1430	    if (($name eq $entry_name || $address eq $entry_address)
1431		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1432	    ) {
1433		if ($entry->[1] eq "") {
1434		    $entry->[1] = "$role";
1435		} else {
1436		    $entry->[1] = "$entry->[1],$role";
1437		}
1438	    }
1439	} else {
1440	    if ($email eq $entry->[0]
1441		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1442	    ) {
1443		if ($entry->[1] eq "") {
1444		    $entry->[1] = "$role";
1445		} else {
1446		    $entry->[1] = "$entry->[1],$role";
1447		}
1448	    }
1449	}
1450    }
1451}
1452
1453sub which {
1454    my ($bin) = @_;
1455
1456    foreach my $path (split(/:/, $ENV{PATH})) {
1457	if (-e "$path/$bin") {
1458	    return "$path/$bin";
1459	}
1460    }
1461
1462    return "";
1463}
1464
1465sub which_conf {
1466    my ($conf) = @_;
1467
1468    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1469	if (-e "$path/$conf") {
1470	    return "$path/$conf";
1471	}
1472    }
1473
1474    return "";
1475}
1476
1477sub mailmap_email {
1478    my ($line) = @_;
1479
1480    my ($name, $address) = parse_email($line);
1481    my $email = format_email($name, $address, 1);
1482    my $real_name = $name;
1483    my $real_address = $address;
1484
1485    if (exists $mailmap->{names}->{$email} ||
1486	exists $mailmap->{addresses}->{$email}) {
1487	if (exists $mailmap->{names}->{$email}) {
1488	    $real_name = $mailmap->{names}->{$email};
1489	}
1490	if (exists $mailmap->{addresses}->{$email}) {
1491	    $real_address = $mailmap->{addresses}->{$email};
1492	}
1493    } else {
1494	if (exists $mailmap->{names}->{$address}) {
1495	    $real_name = $mailmap->{names}->{$address};
1496	}
1497	if (exists $mailmap->{addresses}->{$address}) {
1498	    $real_address = $mailmap->{addresses}->{$address};
1499	}
1500    }
1501    return format_email($real_name, $real_address, 1);
1502}
1503
1504sub mailmap {
1505    my (@addresses) = @_;
1506
1507    my @mapped_emails = ();
1508    foreach my $line (@addresses) {
1509	push(@mapped_emails, mailmap_email($line));
1510    }
1511    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1512    return @mapped_emails;
1513}
1514
1515sub merge_by_realname {
1516    my %address_map;
1517    my (@emails) = @_;
1518
1519    foreach my $email (@emails) {
1520	my ($name, $address) = parse_email($email);
1521	if (exists $address_map{$name}) {
1522	    $address = $address_map{$name};
1523	    $email = format_email($name, $address, 1);
1524	} else {
1525	    $address_map{$name} = $address;
1526	}
1527    }
1528}
1529
1530sub git_execute_cmd {
1531    my ($cmd) = @_;
1532    my @lines = ();
1533
1534    my $output = `$cmd`;
1535    $output =~ s/^\s*//gm;
1536    @lines = split("\n", $output);
1537
1538    return @lines;
1539}
1540
1541sub hg_execute_cmd {
1542    my ($cmd) = @_;
1543    my @lines = ();
1544
1545    my $output = `$cmd`;
1546    @lines = split("\n", $output);
1547
1548    return @lines;
1549}
1550
1551sub extract_formatted_signatures {
1552    my (@signature_lines) = @_;
1553
1554    my @type = @signature_lines;
1555
1556    s/\s*(.*):.*/$1/ for (@type);
1557
1558    # cut -f2- -d":"
1559    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1560
1561## Reformat email addresses (with names) to avoid badly written signatures
1562
1563    foreach my $signer (@signature_lines) {
1564	$signer = deduplicate_email($signer);
1565    }
1566
1567    return (\@type, \@signature_lines);
1568}
1569
1570sub vcs_find_signers {
1571    my ($cmd, $file) = @_;
1572    my $commits;
1573    my @lines = ();
1574    my @signatures = ();
1575    my @authors = ();
1576    my @stats = ();
1577
1578    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1579
1580    my $pattern = $VCS_cmds{"commit_pattern"};
1581    my $author_pattern = $VCS_cmds{"author_pattern"};
1582    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1583
1584    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1585
1586    $commits = grep(/$pattern/, @lines);	# of commits
1587
1588    @authors = grep(/$author_pattern/, @lines);
1589    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1590    @stats = grep(/$stat_pattern/, @lines);
1591
1592#    print("stats: <@stats>\n");
1593
1594    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1595
1596    save_commits_by_author(@lines) if ($interactive);
1597    save_commits_by_signer(@lines) if ($interactive);
1598
1599    if (!$email_git_penguin_chiefs) {
1600	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1601    }
1602
1603    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1604    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1605
1606    return ($commits, $signers_ref, $authors_ref, \@stats);
1607}
1608
1609sub vcs_find_author {
1610    my ($cmd) = @_;
1611    my @lines = ();
1612
1613    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1614
1615    if (!$email_git_penguin_chiefs) {
1616	@lines = grep(!/${penguin_chiefs}/i, @lines);
1617    }
1618
1619    return @lines if !@lines;
1620
1621    my @authors = ();
1622    foreach my $line (@lines) {
1623	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1624	    my $author = $1;
1625	    my ($name, $address) = parse_email($author);
1626	    $author = format_email($name, $address, 1);
1627	    push(@authors, $author);
1628	}
1629    }
1630
1631    save_commits_by_author(@lines) if ($interactive);
1632    save_commits_by_signer(@lines) if ($interactive);
1633
1634    return @authors;
1635}
1636
1637sub vcs_save_commits {
1638    my ($cmd) = @_;
1639    my @lines = ();
1640    my @commits = ();
1641
1642    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1643
1644    foreach my $line (@lines) {
1645	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1646	    push(@commits, $1);
1647	}
1648    }
1649
1650    return @commits;
1651}
1652
1653sub vcs_blame {
1654    my ($file) = @_;
1655    my $cmd;
1656    my @commits = ();
1657
1658    return @commits if (!(-f $file));
1659
1660    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1661	my @all_commits = ();
1662
1663	$cmd = $VCS_cmds{"blame_file_cmd"};
1664	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1665	@all_commits = vcs_save_commits($cmd);
1666
1667	foreach my $file_range_diff (@range) {
1668	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1669	    my $diff_file = $1;
1670	    my $diff_start = $2;
1671	    my $diff_length = $3;
1672	    next if ("$file" ne "$diff_file");
1673	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1674		push(@commits, $all_commits[$i]);
1675	    }
1676	}
1677    } elsif (@range) {
1678	foreach my $file_range_diff (@range) {
1679	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1680	    my $diff_file = $1;
1681	    my $diff_start = $2;
1682	    my $diff_length = $3;
1683	    next if ("$file" ne "$diff_file");
1684	    $cmd = $VCS_cmds{"blame_range_cmd"};
1685	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1686	    push(@commits, vcs_save_commits($cmd));
1687	}
1688    } else {
1689	$cmd = $VCS_cmds{"blame_file_cmd"};
1690	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1691	@commits = vcs_save_commits($cmd);
1692    }
1693
1694    foreach my $commit (@commits) {
1695	$commit =~ s/^\^//g;
1696    }
1697
1698    return @commits;
1699}
1700
1701my $printed_novcs = 0;
1702sub vcs_exists {
1703    %VCS_cmds = %VCS_cmds_git;
1704    return 1 if eval $VCS_cmds{"available"};
1705    %VCS_cmds = %VCS_cmds_hg;
1706    return 2 if eval $VCS_cmds{"available"};
1707    %VCS_cmds = ();
1708    if (!$printed_novcs) {
1709	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1710	warn("Using a git repository produces better results.\n");
1711	warn("Try Linus Torvalds' latest git repository using:\n");
1712	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1713	$printed_novcs = 1;
1714    }
1715    return 0;
1716}
1717
1718sub vcs_is_git {
1719    vcs_exists();
1720    return $vcs_used == 1;
1721}
1722
1723sub vcs_is_hg {
1724    return $vcs_used == 2;
1725}
1726
1727sub interactive_get_maintainers {
1728    my ($list_ref) = @_;
1729    my @list = @$list_ref;
1730
1731    vcs_exists();
1732
1733    my %selected;
1734    my %authored;
1735    my %signed;
1736    my $count = 0;
1737    my $maintained = 0;
1738    foreach my $entry (@list) {
1739	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1740	$selected{$count} = 1;
1741	$authored{$count} = 0;
1742	$signed{$count} = 0;
1743	$count++;
1744    }
1745
1746    #menu loop
1747    my $done = 0;
1748    my $print_options = 0;
1749    my $redraw = 1;
1750    while (!$done) {
1751	$count = 0;
1752	if ($redraw) {
1753	    printf STDERR "\n%1s %2s %-65s",
1754			  "*", "#", "email/list and role:stats";
1755	    if ($email_git ||
1756		($email_git_fallback && !$maintained) ||
1757		$email_git_blame) {
1758		print STDERR "auth sign";
1759	    }
1760	    print STDERR "\n";
1761	    foreach my $entry (@list) {
1762		my $email = $entry->[0];
1763		my $role = $entry->[1];
1764		my $sel = "";
1765		$sel = "*" if ($selected{$count});
1766		my $commit_author = $commit_author_hash{$email};
1767		my $commit_signer = $commit_signer_hash{$email};
1768		my $authored = 0;
1769		my $signed = 0;
1770		$authored++ for (@{$commit_author});
1771		$signed++ for (@{$commit_signer});
1772		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1773		printf STDERR "%4d %4d", $authored, $signed
1774		    if ($authored > 0 || $signed > 0);
1775		printf STDERR "\n     %s\n", $role;
1776		if ($authored{$count}) {
1777		    my $commit_author = $commit_author_hash{$email};
1778		    foreach my $ref (@{$commit_author}) {
1779			print STDERR "     Author: @{$ref}[1]\n";
1780		    }
1781		}
1782		if ($signed{$count}) {
1783		    my $commit_signer = $commit_signer_hash{$email};
1784		    foreach my $ref (@{$commit_signer}) {
1785			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1786		    }
1787		}
1788
1789		$count++;
1790	    }
1791	}
1792	my $date_ref = \$email_git_since;
1793	$date_ref = \$email_hg_since if (vcs_is_hg());
1794	if ($print_options) {
1795	    $print_options = 0;
1796	    if (vcs_exists()) {
1797		print STDERR <<EOT
1798
1799Version Control options:
1800g  use git history      [$email_git]
1801gf use git-fallback     [$email_git_fallback]
1802b  use git blame        [$email_git_blame]
1803bs use blame signatures [$email_git_blame_signatures]
1804c# minimum commits      [$email_git_min_signatures]
1805%# min percent          [$email_git_min_percent]
1806d# history to use       [$$date_ref]
1807x# max maintainers      [$email_git_max_maintainers]
1808t  all signature types  [$email_git_all_signature_types]
1809m  use .mailmap         [$email_use_mailmap]
1810EOT
1811	    }
1812	    print STDERR <<EOT
1813
1814Additional options:
18150  toggle all
1816tm toggle maintainers
1817tg toggle git entries
1818tl toggle open list entries
1819ts toggle subscriber list entries
1820f  emails in file       [$file_emails]
1821k  keywords in file     [$keywords]
1822r  remove duplicates    [$email_remove_duplicates]
1823p# pattern match depth  [$pattern_depth]
1824EOT
1825	}
1826	print STDERR
1827"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1828
1829	my $input = <STDIN>;
1830	chomp($input);
1831
1832	$redraw = 1;
1833	my $rerun = 0;
1834	my @wish = split(/[, ]+/, $input);
1835	foreach my $nr (@wish) {
1836	    $nr = lc($nr);
1837	    my $sel = substr($nr, 0, 1);
1838	    my $str = substr($nr, 1);
1839	    my $val = 0;
1840	    $val = $1 if $str =~ /^(\d+)$/;
1841
1842	    if ($sel eq "y") {
1843		$interactive = 0;
1844		$done = 1;
1845		$output_rolestats = 0;
1846		$output_roles = 0;
1847		last;
1848	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1849		$selected{$nr - 1} = !$selected{$nr - 1};
1850	    } elsif ($sel eq "*" || $sel eq '^') {
1851		my $toggle = 0;
1852		$toggle = 1 if ($sel eq '*');
1853		for (my $i = 0; $i < $count; $i++) {
1854		    $selected{$i} = $toggle;
1855		}
1856	    } elsif ($sel eq "0") {
1857		for (my $i = 0; $i < $count; $i++) {
1858		    $selected{$i} = !$selected{$i};
1859		}
1860	    } elsif ($sel eq "t") {
1861		if (lc($str) eq "m") {
1862		    for (my $i = 0; $i < $count; $i++) {
1863			$selected{$i} = !$selected{$i}
1864			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1865		    }
1866		} elsif (lc($str) eq "g") {
1867		    for (my $i = 0; $i < $count; $i++) {
1868			$selected{$i} = !$selected{$i}
1869			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1870		    }
1871		} elsif (lc($str) eq "l") {
1872		    for (my $i = 0; $i < $count; $i++) {
1873			$selected{$i} = !$selected{$i}
1874			    if ($list[$i]->[1] =~ /^(open list)/i);
1875		    }
1876		} elsif (lc($str) eq "s") {
1877		    for (my $i = 0; $i < $count; $i++) {
1878			$selected{$i} = !$selected{$i}
1879			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1880		    }
1881		}
1882	    } elsif ($sel eq "a") {
1883		if ($val > 0 && $val <= $count) {
1884		    $authored{$val - 1} = !$authored{$val - 1};
1885		} elsif ($str eq '*' || $str eq '^') {
1886		    my $toggle = 0;
1887		    $toggle = 1 if ($str eq '*');
1888		    for (my $i = 0; $i < $count; $i++) {
1889			$authored{$i} = $toggle;
1890		    }
1891		}
1892	    } elsif ($sel eq "s") {
1893		if ($val > 0 && $val <= $count) {
1894		    $signed{$val - 1} = !$signed{$val - 1};
1895		} elsif ($str eq '*' || $str eq '^') {
1896		    my $toggle = 0;
1897		    $toggle = 1 if ($str eq '*');
1898		    for (my $i = 0; $i < $count; $i++) {
1899			$signed{$i} = $toggle;
1900		    }
1901		}
1902	    } elsif ($sel eq "o") {
1903		$print_options = 1;
1904		$redraw = 1;
1905	    } elsif ($sel eq "g") {
1906		if ($str eq "f") {
1907		    bool_invert(\$email_git_fallback);
1908		} else {
1909		    bool_invert(\$email_git);
1910		}
1911		$rerun = 1;
1912	    } elsif ($sel eq "b") {
1913		if ($str eq "s") {
1914		    bool_invert(\$email_git_blame_signatures);
1915		} else {
1916		    bool_invert(\$email_git_blame);
1917		}
1918		$rerun = 1;
1919	    } elsif ($sel eq "c") {
1920		if ($val > 0) {
1921		    $email_git_min_signatures = $val;
1922		    $rerun = 1;
1923		}
1924	    } elsif ($sel eq "x") {
1925		if ($val > 0) {
1926		    $email_git_max_maintainers = $val;
1927		    $rerun = 1;
1928		}
1929	    } elsif ($sel eq "%") {
1930		if ($str ne "" && $val >= 0) {
1931		    $email_git_min_percent = $val;
1932		    $rerun = 1;
1933		}
1934	    } elsif ($sel eq "d") {
1935		if (vcs_is_git()) {
1936		    $email_git_since = $str;
1937		} elsif (vcs_is_hg()) {
1938		    $email_hg_since = $str;
1939		}
1940		$rerun = 1;
1941	    } elsif ($sel eq "t") {
1942		bool_invert(\$email_git_all_signature_types);
1943		$rerun = 1;
1944	    } elsif ($sel eq "f") {
1945		bool_invert(\$file_emails);
1946		$rerun = 1;
1947	    } elsif ($sel eq "r") {
1948		bool_invert(\$email_remove_duplicates);
1949		$rerun = 1;
1950	    } elsif ($sel eq "m") {
1951		bool_invert(\$email_use_mailmap);
1952		read_mailmap();
1953		$rerun = 1;
1954	    } elsif ($sel eq "k") {
1955		bool_invert(\$keywords);
1956		$rerun = 1;
1957	    } elsif ($sel eq "p") {
1958		if ($str ne "" && $val >= 0) {
1959		    $pattern_depth = $val;
1960		    $rerun = 1;
1961		}
1962	    } elsif ($sel eq "h" || $sel eq "?") {
1963		print STDERR <<EOT
1964
1965Interactive mode allows you to select the various maintainers, submitters,
1966commit signers and mailing lists that could be CC'd on a patch.
1967
1968Any *'d entry is selected.
1969
1970If you have git or hg installed, you can choose to summarize the commit
1971history of files in the patch.  Also, each line of the current file can
1972be matched to its commit author and that commits signers with blame.
1973
1974Various knobs exist to control the length of time for active commit
1975tracking, the maximum number of commit authors and signers to add,
1976and such.
1977
1978Enter selections at the prompt until you are satisfied that the selected
1979maintainers are appropriate.  You may enter multiple selections separated
1980by either commas or spaces.
1981
1982EOT
1983	    } else {
1984		print STDERR "invalid option: '$nr'\n";
1985		$redraw = 0;
1986	    }
1987	}
1988	if ($rerun) {
1989	    print STDERR "git-blame can be very slow, please have patience..."
1990		if ($email_git_blame);
1991	    goto &get_maintainers;
1992	}
1993    }
1994
1995    #drop not selected entries
1996    $count = 0;
1997    my @new_emailto = ();
1998    foreach my $entry (@list) {
1999	if ($selected{$count}) {
2000	    push(@new_emailto, $list[$count]);
2001	}
2002	$count++;
2003    }
2004    return @new_emailto;
2005}
2006
2007sub bool_invert {
2008    my ($bool_ref) = @_;
2009
2010    if ($$bool_ref) {
2011	$$bool_ref = 0;
2012    } else {
2013	$$bool_ref = 1;
2014    }
2015}
2016
2017sub deduplicate_email {
2018    my ($email) = @_;
2019
2020    my $matched = 0;
2021    my ($name, $address) = parse_email($email);
2022    $email = format_email($name, $address, 1);
2023    $email = mailmap_email($email);
2024
2025    return $email if (!$email_remove_duplicates);
2026
2027    ($name, $address) = parse_email($email);
2028
2029    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
2030	$name = $deduplicate_name_hash{lc($name)}->[0];
2031	$address = $deduplicate_name_hash{lc($name)}->[1];
2032	$matched = 1;
2033    } elsif ($deduplicate_address_hash{lc($address)}) {
2034	$name = $deduplicate_address_hash{lc($address)}->[0];
2035	$address = $deduplicate_address_hash{lc($address)}->[1];
2036	$matched = 1;
2037    }
2038    if (!$matched) {
2039	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
2040	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
2041    }
2042    $email = format_email($name, $address, 1);
2043    $email = mailmap_email($email);
2044    return $email;
2045}
2046
2047sub save_commits_by_author {
2048    my (@lines) = @_;
2049
2050    my @authors = ();
2051    my @commits = ();
2052    my @subjects = ();
2053
2054    foreach my $line (@lines) {
2055	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2056	    my $author = $1;
2057	    $author = deduplicate_email($author);
2058	    push(@authors, $author);
2059	}
2060	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2061	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2062    }
2063
2064    for (my $i = 0; $i < @authors; $i++) {
2065	my $exists = 0;
2066	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
2067	    if (@{$ref}[0] eq $commits[$i] &&
2068		@{$ref}[1] eq $subjects[$i]) {
2069		$exists = 1;
2070		last;
2071	    }
2072	}
2073	if (!$exists) {
2074	    push(@{$commit_author_hash{$authors[$i]}},
2075		 [ ($commits[$i], $subjects[$i]) ]);
2076	}
2077    }
2078}
2079
2080sub save_commits_by_signer {
2081    my (@lines) = @_;
2082
2083    my $commit = "";
2084    my $subject = "";
2085
2086    foreach my $line (@lines) {
2087	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
2088	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
2089	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
2090	    my @signatures = ($line);
2091	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
2092	    my @types = @$types_ref;
2093	    my @signers = @$signers_ref;
2094
2095	    my $type = $types[0];
2096	    my $signer = $signers[0];
2097
2098	    $signer = deduplicate_email($signer);
2099
2100	    my $exists = 0;
2101	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
2102		if (@{$ref}[0] eq $commit &&
2103		    @{$ref}[1] eq $subject &&
2104		    @{$ref}[2] eq $type) {
2105		    $exists = 1;
2106		    last;
2107		}
2108	    }
2109	    if (!$exists) {
2110		push(@{$commit_signer_hash{$signer}},
2111		     [ ($commit, $subject, $type) ]);
2112	    }
2113	}
2114    }
2115}
2116
2117sub vcs_assign {
2118    my ($role, $divisor, @lines) = @_;
2119
2120    my %hash;
2121    my $count = 0;
2122
2123    return if (@lines <= 0);
2124
2125    if ($divisor <= 0) {
2126	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
2127	$divisor = 1;
2128    }
2129
2130    @lines = mailmap(@lines);
2131
2132    return if (@lines <= 0);
2133
2134    @lines = sort(@lines);
2135
2136    # uniq -c
2137    $hash{$_}++ for @lines;
2138
2139    # sort -rn
2140    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
2141	my $sign_offs = $hash{$line};
2142	my $percent = $sign_offs * 100 / $divisor;
2143
2144	$percent = 100 if ($percent > 100);
2145	next if (ignore_email_address($line));
2146	$count++;
2147	last if ($sign_offs < $email_git_min_signatures ||
2148		 $count > $email_git_max_maintainers ||
2149		 $percent < $email_git_min_percent);
2150	push_email_address($line, '');
2151	if ($output_rolestats) {
2152	    my $fmt_percent = sprintf("%.0f", $percent);
2153	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
2154	} else {
2155	    add_role($line, $role);
2156	}
2157    }
2158}
2159
2160sub vcs_file_signoffs {
2161    my ($file) = @_;
2162
2163    my $authors_ref;
2164    my $signers_ref;
2165    my $stats_ref;
2166    my @authors = ();
2167    my @signers = ();
2168    my @stats = ();
2169    my $commits;
2170
2171    $vcs_used = vcs_exists();
2172    return if (!$vcs_used);
2173
2174    my $cmd = $VCS_cmds{"find_signers_cmd"};
2175    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2176
2177    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2178
2179    @signers = @{$signers_ref} if defined $signers_ref;
2180    @authors = @{$authors_ref} if defined $authors_ref;
2181    @stats = @{$stats_ref} if defined $stats_ref;
2182
2183#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
2184
2185    foreach my $signer (@signers) {
2186	$signer = deduplicate_email($signer);
2187    }
2188
2189    vcs_assign("commit_signer", $commits, @signers);
2190    vcs_assign("authored", $commits, @authors);
2191    if ($#authors == $#stats) {
2192	my $stat_pattern = $VCS_cmds{"stat_pattern"};
2193	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
2194
2195	my $added = 0;
2196	my $deleted = 0;
2197	for (my $i = 0; $i <= $#stats; $i++) {
2198	    if ($stats[$i] =~ /$stat_pattern/) {
2199		$added += $1;
2200		$deleted += $2;
2201	    }
2202	}
2203	my @tmp_authors = uniq(@authors);
2204	foreach my $author (@tmp_authors) {
2205	    $author = deduplicate_email($author);
2206	}
2207	@tmp_authors = uniq(@tmp_authors);
2208	my @list_added = ();
2209	my @list_deleted = ();
2210	foreach my $author (@tmp_authors) {
2211	    my $auth_added = 0;
2212	    my $auth_deleted = 0;
2213	    for (my $i = 0; $i <= $#stats; $i++) {
2214		if ($author eq deduplicate_email($authors[$i]) &&
2215		    $stats[$i] =~ /$stat_pattern/) {
2216		    $auth_added += $1;
2217		    $auth_deleted += $2;
2218		}
2219	    }
2220	    for (my $i = 0; $i < $auth_added; $i++) {
2221		push(@list_added, $author);
2222	    }
2223	    for (my $i = 0; $i < $auth_deleted; $i++) {
2224		push(@list_deleted, $author);
2225	    }
2226	}
2227	vcs_assign("added_lines", $added, @list_added);
2228	vcs_assign("removed_lines", $deleted, @list_deleted);
2229    }
2230}
2231
2232sub vcs_file_blame {
2233    my ($file) = @_;
2234
2235    my @signers = ();
2236    my @all_commits = ();
2237    my @commits = ();
2238    my $total_commits;
2239    my $total_lines;
2240
2241    $vcs_used = vcs_exists();
2242    return if (!$vcs_used);
2243
2244    @all_commits = vcs_blame($file);
2245    @commits = uniq(@all_commits);
2246    $total_commits = @commits;
2247    $total_lines = @all_commits;
2248
2249    if ($email_git_blame_signatures) {
2250	if (vcs_is_hg()) {
2251	    my $commit_count;
2252	    my $commit_authors_ref;
2253	    my $commit_signers_ref;
2254	    my $stats_ref;
2255	    my @commit_authors = ();
2256	    my @commit_signers = ();
2257	    my $commit = join(" -r ", @commits);
2258	    my $cmd;
2259
2260	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
2261	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2262
2263	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2264	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2265	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2266
2267	    push(@signers, @commit_signers);
2268	} else {
2269	    foreach my $commit (@commits) {
2270		my $commit_count;
2271		my $commit_authors_ref;
2272		my $commit_signers_ref;
2273		my $stats_ref;
2274		my @commit_authors = ();
2275		my @commit_signers = ();
2276		my $cmd;
2277
2278		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
2279		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2280
2281		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2282		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2283		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2284
2285		push(@signers, @commit_signers);
2286	    }
2287	}
2288    }
2289
2290    if ($from_filename) {
2291	if ($output_rolestats) {
2292	    my @blame_signers;
2293	    if (vcs_is_hg()) {{		# Double brace for last exit
2294		my $commit_count;
2295		my @commit_signers = ();
2296		@commits = uniq(@commits);
2297		@commits = sort(@commits);
2298		my $commit = join(" -r ", @commits);
2299		my $cmd;
2300
2301		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2302		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2303
2304		my @lines = ();
2305
2306		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2307
2308		if (!$email_git_penguin_chiefs) {
2309		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2310		}
2311
2312		last if !@lines;
2313
2314		my @authors = ();
2315		foreach my $line (@lines) {
2316		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2317			my $author = $1;
2318			$author = deduplicate_email($author);
2319			push(@authors, $author);
2320		    }
2321		}
2322
2323		save_commits_by_author(@lines) if ($interactive);
2324		save_commits_by_signer(@lines) if ($interactive);
2325
2326		push(@signers, @authors);
2327	    }}
2328	    else {
2329		foreach my $commit (@commits) {
2330		    my $i;
2331		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2332		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2333		    my @author = vcs_find_author($cmd);
2334		    next if !@author;
2335
2336		    my $formatted_author = deduplicate_email($author[0]);
2337
2338		    my $count = grep(/$commit/, @all_commits);
2339		    for ($i = 0; $i < $count ; $i++) {
2340			push(@blame_signers, $formatted_author);
2341		    }
2342		}
2343	    }
2344	    if (@blame_signers) {
2345		vcs_assign("authored lines", $total_lines, @blame_signers);
2346	    }
2347	}
2348	foreach my $signer (@signers) {
2349	    $signer = deduplicate_email($signer);
2350	}
2351	vcs_assign("commits", $total_commits, @signers);
2352    } else {
2353	foreach my $signer (@signers) {
2354	    $signer = deduplicate_email($signer);
2355	}
2356	vcs_assign("modified commits", $total_commits, @signers);
2357    }
2358}
2359
2360sub vcs_file_exists {
2361    my ($file) = @_;
2362
2363    my $exists;
2364
2365    my $vcs_used = vcs_exists();
2366    return 0 if (!$vcs_used);
2367
2368    my $cmd = $VCS_cmds{"file_exists_cmd"};
2369    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2370    $cmd .= " 2>&1";
2371    $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
2372
2373    return 0 if ($? != 0);
2374
2375    return $exists;
2376}
2377
2378sub vcs_list_files {
2379    my ($file) = @_;
2380
2381    my @lsfiles = ();
2382
2383    my $vcs_used = vcs_exists();
2384    return 0 if (!$vcs_used);
2385
2386    my $cmd = $VCS_cmds{"list_files_cmd"};
2387    $cmd =~ s/(\$\w+)/$1/eeg;   # interpolate $cmd
2388    @lsfiles = &{$VCS_cmds{"execute_cmd"}}($cmd);
2389
2390    return () if ($? != 0);
2391
2392    return @lsfiles;
2393}
2394
2395sub uniq {
2396    my (@parms) = @_;
2397
2398    my %saw;
2399    @parms = grep(!$saw{$_}++, @parms);
2400    return @parms;
2401}
2402
2403sub sort_and_uniq {
2404    my (@parms) = @_;
2405
2406    my %saw;
2407    @parms = sort @parms;
2408    @parms = grep(!$saw{$_}++, @parms);
2409    return @parms;
2410}
2411
2412sub clean_file_emails {
2413    my (@file_emails) = @_;
2414    my @fmt_emails = ();
2415
2416    foreach my $email (@file_emails) {
2417	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2418	my ($name, $address) = parse_email($email);
2419	if ($name eq '"[,\.]"') {
2420	    $name = "";
2421	}
2422
2423	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2424	if (@nw > 2) {
2425	    my $first = $nw[@nw - 3];
2426	    my $middle = $nw[@nw - 2];
2427	    my $last = $nw[@nw - 1];
2428
2429	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2430		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2431		(length($middle) == 1 ||
2432		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2433		$name = "$first $middle $last";
2434	    } else {
2435		$name = "$middle $last";
2436	    }
2437	}
2438
2439	if (substr($name, -1) =~ /[,\.]/) {
2440	    $name = substr($name, 0, length($name) - 1);
2441	} elsif (substr($name, -2) =~ /[,\.]"/) {
2442	    $name = substr($name, 0, length($name) - 2) . '"';
2443	}
2444
2445	if (substr($name, 0, 1) =~ /[,\.]/) {
2446	    $name = substr($name, 1, length($name) - 1);
2447	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2448	    $name = '"' . substr($name, 2, length($name) - 2);
2449	}
2450
2451	my $fmt_email = format_email($name, $address, $email_usename);
2452	push(@fmt_emails, $fmt_email);
2453    }
2454    return @fmt_emails;
2455}
2456
2457sub merge_email {
2458    my @lines;
2459    my %saw;
2460
2461    for (@_) {
2462	my ($address, $role) = @$_;
2463	if (!$saw{$address}) {
2464	    if ($output_roles) {
2465		push(@lines, "$address ($role)");
2466	    } else {
2467		push(@lines, $address);
2468	    }
2469	    $saw{$address} = 1;
2470	}
2471    }
2472
2473    return @lines;
2474}
2475
2476sub output {
2477    my (@parms) = @_;
2478
2479    if ($output_multiline) {
2480	foreach my $line (@parms) {
2481	    print("${line}\n");
2482	}
2483    } else {
2484	print(join($output_separator, @parms));
2485	print("\n");
2486    }
2487}
2488
2489my $rfc822re;
2490
2491sub make_rfc822re {
2492#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2493#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2494#   This regexp will only work on addresses which have had comments stripped
2495#   and replaced with rfc822_lwsp.
2496
2497    my $specials = '()<>@,;:\\\\".\\[\\]';
2498    my $controls = '\\000-\\037\\177';
2499
2500    my $dtext = "[^\\[\\]\\r\\\\]";
2501    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2502
2503    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2504
2505#   Use zero-width assertion to spot the limit of an atom.  A simple
2506#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2507    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2508    my $word = "(?:$atom|$quoted_string)";
2509    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2510
2511    my $sub_domain = "(?:$atom|$domain_literal)";
2512    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2513
2514    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2515
2516    my $phrase = "$word*";
2517    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2518    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2519    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2520
2521    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2522    my $address = "(?:$mailbox|$group)";
2523
2524    return "$rfc822_lwsp*$address";
2525}
2526
2527sub rfc822_strip_comments {
2528    my $s = shift;
2529#   Recursively remove comments, and replace with a single space.  The simpler
2530#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2531#   chars in atoms, for example.
2532
2533    while ($s =~ s/^((?:[^"\\]|\\.)*
2534                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2535                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2536    return $s;
2537}
2538
2539#   valid: returns true if the parameter is an RFC822 valid address
2540#
2541sub rfc822_valid {
2542    my $s = rfc822_strip_comments(shift);
2543
2544    if (!$rfc822re) {
2545        $rfc822re = make_rfc822re();
2546    }
2547
2548    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2549}
2550
2551#   validlist: In scalar context, returns true if the parameter is an RFC822
2552#              valid list of addresses.
2553#
2554#              In list context, returns an empty list on failure (an invalid
2555#              address was found); otherwise a list whose first element is the
2556#              number of addresses found and whose remaining elements are the
2557#              addresses.  This is needed to disambiguate failure (invalid)
2558#              from success with no addresses found, because an empty string is
2559#              a valid list.
2560
2561sub rfc822_validlist {
2562    my $s = rfc822_strip_comments(shift);
2563
2564    if (!$rfc822re) {
2565        $rfc822re = make_rfc822re();
2566    }
2567    # * null list items are valid according to the RFC
2568    # * the '1' business is to aid in distinguishing failure from no results
2569
2570    my @r;
2571    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2572	$s =~ m/^$rfc822_char*$/) {
2573        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2574            push(@r, $1);
2575        }
2576        return wantarray ? (scalar(@r), @r) : 1;
2577    }
2578    return wantarray ? () : 0;
2579}
2580