xref: /openbmc/linux/scripts/get_maintainer.pl (revision e983940270f10fe8551baf0098be76ea478294a3)
1#!/usr/bin/perl -w
2# (c) 2007, Joe Perches <joe@perches.com>
3#           created from checkpatch.pl
4#
5# Print selected MAINTAINERS information for
6# the files modified in a patch or for a file
7#
8# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
9#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
10#
11# Licensed under the terms of the GNU GPL License version 2
12
13use strict;
14
15my $P = $0;
16my $V = '0.26';
17
18use Getopt::Long qw(:config no_auto_abbrev);
19use Cwd;
20
21my $cur_path = fastgetcwd() . '/';
22my $lk_path = "./";
23my $email = 1;
24my $email_usename = 1;
25my $email_maintainer = 1;
26my $email_reviewer = 1;
27my $email_list = 1;
28my $email_subscriber_list = 0;
29my $email_git_penguin_chiefs = 0;
30my $email_git = 0;
31my $email_git_all_signature_types = 0;
32my $email_git_blame = 0;
33my $email_git_blame_signatures = 1;
34my $email_git_fallback = 1;
35my $email_git_min_signatures = 1;
36my $email_git_max_maintainers = 5;
37my $email_git_min_percent = 5;
38my $email_git_since = "1-year-ago";
39my $email_hg_since = "-365";
40my $interactive = 0;
41my $email_remove_duplicates = 1;
42my $email_use_mailmap = 1;
43my $output_multiline = 1;
44my $output_separator = ", ";
45my $output_roles = 0;
46my $output_rolestats = 1;
47my $output_section_maxlen = 50;
48my $scm = 0;
49my $web = 0;
50my $subsystem = 0;
51my $status = 0;
52my $keywords = 1;
53my $sections = 0;
54my $file_emails = 0;
55my $from_filename = 0;
56my $pattern_depth = 0;
57my $version = 0;
58my $help = 0;
59
60my $vcs_used = 0;
61
62my $exit = 0;
63
64my %commit_author_hash;
65my %commit_signer_hash;
66
67my @penguin_chief = ();
68push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
69#Andrew wants in on most everything - 2009/01/14
70#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
71
72my @penguin_chief_names = ();
73foreach my $chief (@penguin_chief) {
74    if ($chief =~ m/^(.*):(.*)/) {
75	my $chief_name = $1;
76	my $chief_addr = $2;
77	push(@penguin_chief_names, $chief_name);
78    }
79}
80my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
81
82# Signature types of people who are either
83# 	a) responsible for the code in question, or
84# 	b) familiar enough with it to give relevant feedback
85my @signature_tags = ();
86push(@signature_tags, "Signed-off-by:");
87push(@signature_tags, "Reviewed-by:");
88push(@signature_tags, "Acked-by:");
89
90my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
91
92# rfc822 email address - preloaded methods go here.
93my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
94my $rfc822_char = '[\\000-\\377]';
95
96# VCS command support: class-like functions and strings
97
98my %VCS_cmds;
99
100my %VCS_cmds_git = (
101    "execute_cmd" => \&git_execute_cmd,
102    "available" => '(which("git") ne "") && (-e ".git")',
103    "find_signers_cmd" =>
104	"git log --no-color --follow --since=\$email_git_since " .
105	    '--numstat --no-merges ' .
106	    '--format="GitCommit: %H%n' .
107		      'GitAuthor: %an <%ae>%n' .
108		      'GitDate: %aD%n' .
109		      'GitSubject: %s%n' .
110		      '%b%n"' .
111	    " -- \$file",
112    "find_commit_signers_cmd" =>
113	"git log --no-color " .
114	    '--numstat ' .
115	    '--format="GitCommit: %H%n' .
116		      'GitAuthor: %an <%ae>%n' .
117		      'GitDate: %aD%n' .
118		      'GitSubject: %s%n' .
119		      '%b%n"' .
120	    " -1 \$commit",
121    "find_commit_author_cmd" =>
122	"git log --no-color " .
123	    '--numstat ' .
124	    '--format="GitCommit: %H%n' .
125		      'GitAuthor: %an <%ae>%n' .
126		      'GitDate: %aD%n' .
127		      'GitSubject: %s%n"' .
128	    " -1 \$commit",
129    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
130    "blame_file_cmd" => "git blame -l \$file",
131    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
132    "blame_commit_pattern" => "^([0-9a-f]+) ",
133    "author_pattern" => "^GitAuthor: (.*)",
134    "subject_pattern" => "^GitSubject: (.*)",
135    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
136    "file_exists_cmd" => "git ls-files \$file",
137);
138
139my %VCS_cmds_hg = (
140    "execute_cmd" => \&hg_execute_cmd,
141    "available" => '(which("hg") ne "") && (-d ".hg")',
142    "find_signers_cmd" =>
143	"hg log --date=\$email_hg_since " .
144	    "--template='HgCommit: {node}\\n" .
145	                "HgAuthor: {author}\\n" .
146			"HgSubject: {desc}\\n'" .
147	    " -- \$file",
148    "find_commit_signers_cmd" =>
149	"hg log " .
150	    "--template='HgSubject: {desc}\\n'" .
151	    " -r \$commit",
152    "find_commit_author_cmd" =>
153	"hg log " .
154	    "--template='HgCommit: {node}\\n" .
155		        "HgAuthor: {author}\\n" .
156			"HgSubject: {desc|firstline}\\n'" .
157	    " -r \$commit",
158    "blame_range_cmd" => "",		# not supported
159    "blame_file_cmd" => "hg blame -n \$file",
160    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
161    "blame_commit_pattern" => "^([ 0-9a-f]+):",
162    "author_pattern" => "^HgAuthor: (.*)",
163    "subject_pattern" => "^HgSubject: (.*)",
164    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
165    "file_exists_cmd" => "hg files \$file",
166);
167
168my $conf = which_conf(".get_maintainer.conf");
169if (-f $conf) {
170    my @conf_args;
171    open(my $conffile, '<', "$conf")
172	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
173
174    while (<$conffile>) {
175	my $line = $_;
176
177	$line =~ s/\s*\n?$//g;
178	$line =~ s/^\s*//g;
179	$line =~ s/\s+/ /g;
180
181	next if ($line =~ m/^\s*#/);
182	next if ($line =~ m/^\s*$/);
183
184	my @words = split(" ", $line);
185	foreach my $word (@words) {
186	    last if ($word =~ m/^#/);
187	    push (@conf_args, $word);
188	}
189    }
190    close($conffile);
191    unshift(@ARGV, @conf_args) if @conf_args;
192}
193
194my @ignore_emails = ();
195my $ignore_file = which_conf(".get_maintainer.ignore");
196if (-f $ignore_file) {
197    open(my $ignore, '<', "$ignore_file")
198	or warn "$P: Can't find a readable .get_maintainer.ignore file $!\n";
199    while (<$ignore>) {
200	my $line = $_;
201
202	$line =~ s/\s*\n?$//;
203	$line =~ s/^\s*//;
204	$line =~ s/\s+$//;
205	$line =~ s/#.*$//;
206
207	next if ($line =~ m/^\s*$/);
208	if (rfc822_valid($line)) {
209	    push(@ignore_emails, $line);
210	}
211    }
212    close($ignore);
213}
214
215if (!GetOptions(
216		'email!' => \$email,
217		'git!' => \$email_git,
218		'git-all-signature-types!' => \$email_git_all_signature_types,
219		'git-blame!' => \$email_git_blame,
220		'git-blame-signatures!' => \$email_git_blame_signatures,
221		'git-fallback!' => \$email_git_fallback,
222		'git-chief-penguins!' => \$email_git_penguin_chiefs,
223		'git-min-signatures=i' => \$email_git_min_signatures,
224		'git-max-maintainers=i' => \$email_git_max_maintainers,
225		'git-min-percent=i' => \$email_git_min_percent,
226		'git-since=s' => \$email_git_since,
227		'hg-since=s' => \$email_hg_since,
228		'i|interactive!' => \$interactive,
229		'remove-duplicates!' => \$email_remove_duplicates,
230		'mailmap!' => \$email_use_mailmap,
231		'm!' => \$email_maintainer,
232		'r!' => \$email_reviewer,
233		'n!' => \$email_usename,
234		'l!' => \$email_list,
235		's!' => \$email_subscriber_list,
236		'multiline!' => \$output_multiline,
237		'roles!' => \$output_roles,
238		'rolestats!' => \$output_rolestats,
239		'separator=s' => \$output_separator,
240		'subsystem!' => \$subsystem,
241		'status!' => \$status,
242		'scm!' => \$scm,
243		'web!' => \$web,
244		'pattern-depth=i' => \$pattern_depth,
245		'k|keywords!' => \$keywords,
246		'sections!' => \$sections,
247		'fe|file-emails!' => \$file_emails,
248		'f|file' => \$from_filename,
249		'v|version' => \$version,
250		'h|help|usage' => \$help,
251		)) {
252    die "$P: invalid argument - use --help if necessary\n";
253}
254
255if ($help != 0) {
256    usage();
257    exit 0;
258}
259
260if ($version != 0) {
261    print("${P} ${V}\n");
262    exit 0;
263}
264
265if (-t STDIN && !@ARGV) {
266    # We're talking to a terminal, but have no command line arguments.
267    die "$P: missing patchfile or -f file - use --help if necessary\n";
268}
269
270$output_multiline = 0 if ($output_separator ne ", ");
271$output_rolestats = 1 if ($interactive);
272$output_roles = 1 if ($output_rolestats);
273
274if ($sections) {
275    $email = 0;
276    $email_list = 0;
277    $scm = 0;
278    $status = 0;
279    $subsystem = 0;
280    $web = 0;
281    $keywords = 0;
282    $interactive = 0;
283} else {
284    my $selections = $email + $scm + $status + $subsystem + $web;
285    if ($selections == 0) {
286	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
287    }
288}
289
290if ($email &&
291    ($email_maintainer + $email_reviewer +
292     $email_list + $email_subscriber_list +
293     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
294    die "$P: Please select at least 1 email option\n";
295}
296
297if (!top_of_kernel_tree($lk_path)) {
298    die "$P: The current directory does not appear to be "
299	. "a linux kernel source tree.\n";
300}
301
302## Read MAINTAINERS for type/value pairs
303
304my @typevalue = ();
305my %keyword_hash;
306
307open (my $maint, '<', "${lk_path}MAINTAINERS")
308    or die "$P: Can't open MAINTAINERS: $!\n";
309while (<$maint>) {
310    my $line = $_;
311
312    if ($line =~ m/^([A-Z]):\s*(.*)/) {
313	my $type = $1;
314	my $value = $2;
315
316	##Filename pattern matching
317	if ($type eq "F" || $type eq "X") {
318	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
319	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
320	    $value =~ s/\?/\./g;         ##Convert ? to .
321	    ##if pattern is a directory and it lacks a trailing slash, add one
322	    if ((-d $value)) {
323		$value =~ s@([^/])$@$1/@;
324	    }
325	} elsif ($type eq "K") {
326	    $keyword_hash{@typevalue} = $value;
327	}
328	push(@typevalue, "$type:$value");
329    } elsif (!/^(\s)*$/) {
330	$line =~ s/\n$//g;
331	push(@typevalue, $line);
332    }
333}
334close($maint);
335
336
337#
338# Read mail address map
339#
340
341my $mailmap;
342
343read_mailmap();
344
345sub read_mailmap {
346    $mailmap = {
347	names => {},
348	addresses => {}
349    };
350
351    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
352
353    open(my $mailmap_file, '<', "${lk_path}.mailmap")
354	or warn "$P: Can't open .mailmap: $!\n";
355
356    while (<$mailmap_file>) {
357	s/#.*$//; #strip comments
358	s/^\s+|\s+$//g; #trim
359
360	next if (/^\s*$/); #skip empty lines
361	#entries have one of the following formats:
362	# name1 <mail1>
363	# <mail1> <mail2>
364	# name1 <mail1> <mail2>
365	# name1 <mail1> name2 <mail2>
366	# (see man git-shortlog)
367
368	if (/^([^<]+)<([^>]+)>$/) {
369	    my $real_name = $1;
370	    my $address = $2;
371
372	    $real_name =~ s/\s+$//;
373	    ($real_name, $address) = parse_email("$real_name <$address>");
374	    $mailmap->{names}->{$address} = $real_name;
375
376	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
377	    my $real_address = $1;
378	    my $wrong_address = $2;
379
380	    $mailmap->{addresses}->{$wrong_address} = $real_address;
381
382	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
383	    my $real_name = $1;
384	    my $real_address = $2;
385	    my $wrong_address = $3;
386
387	    $real_name =~ s/\s+$//;
388	    ($real_name, $real_address) =
389		parse_email("$real_name <$real_address>");
390	    $mailmap->{names}->{$wrong_address} = $real_name;
391	    $mailmap->{addresses}->{$wrong_address} = $real_address;
392
393	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
394	    my $real_name = $1;
395	    my $real_address = $2;
396	    my $wrong_name = $3;
397	    my $wrong_address = $4;
398
399	    $real_name =~ s/\s+$//;
400	    ($real_name, $real_address) =
401		parse_email("$real_name <$real_address>");
402
403	    $wrong_name =~ s/\s+$//;
404	    ($wrong_name, $wrong_address) =
405		parse_email("$wrong_name <$wrong_address>");
406
407	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
408	    $mailmap->{names}->{$wrong_email} = $real_name;
409	    $mailmap->{addresses}->{$wrong_email} = $real_address;
410	}
411    }
412    close($mailmap_file);
413}
414
415## use the filenames on the command line or find the filenames in the patchfiles
416
417my @files = ();
418my @range = ();
419my @keyword_tvi = ();
420my @file_emails = ();
421
422if (!@ARGV) {
423    push(@ARGV, "&STDIN");
424}
425
426foreach my $file (@ARGV) {
427    if ($file ne "&STDIN") {
428	##if $file is a directory and it lacks a trailing slash, add one
429	if ((-d $file)) {
430	    $file =~ s@([^/])$@$1/@;
431	} elsif (!(-f $file)) {
432	    die "$P: file '${file}' not found\n";
433	}
434    }
435    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
436	$file =~ s/^\Q${cur_path}\E//;	#strip any absolute path
437	$file =~ s/^\Q${lk_path}\E//;	#or the path to the lk tree
438	push(@files, $file);
439	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
440	    open(my $f, '<', $file)
441		or die "$P: Can't open $file: $!\n";
442	    my $text = do { local($/) ; <$f> };
443	    close($f);
444	    if ($keywords) {
445		foreach my $line (keys %keyword_hash) {
446		    if ($text =~ m/$keyword_hash{$line}/x) {
447			push(@keyword_tvi, $line);
448		    }
449		}
450	    }
451	    if ($file_emails) {
452		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
453		push(@file_emails, clean_file_emails(@poss_addr));
454	    }
455	}
456    } else {
457	my $file_cnt = @files;
458	my $lastfile;
459
460	open(my $patch, "< $file")
461	    or die "$P: Can't open $file: $!\n";
462
463	# We can check arbitrary information before the patch
464	# like the commit message, mail headers, etc...
465	# This allows us to match arbitrary keywords against any part
466	# of a git format-patch generated file (subject tags, etc...)
467
468	my $patch_prefix = "";			#Parsing the intro
469
470	while (<$patch>) {
471	    my $patch_line = $_;
472	    if (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
473		my $filename = $1;
474		$filename =~ s@^[^/]*/@@;
475		$filename =~ s@\n@@;
476		$lastfile = $filename;
477		push(@files, $filename);
478		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
479	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
480		if ($email_git_blame) {
481		    push(@range, "$lastfile:$1:$2");
482		}
483	    } elsif ($keywords) {
484		foreach my $line (keys %keyword_hash) {
485		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
486			push(@keyword_tvi, $line);
487		    }
488		}
489	    }
490	}
491	close($patch);
492
493	if ($file_cnt == @files) {
494	    warn "$P: file '${file}' doesn't appear to be a patch.  "
495		. "Add -f to options?\n";
496	}
497	@files = sort_and_uniq(@files);
498    }
499}
500
501@file_emails = uniq(@file_emails);
502
503my %email_hash_name;
504my %email_hash_address;
505my @email_to = ();
506my %hash_list_to;
507my @list_to = ();
508my @scm = ();
509my @web = ();
510my @subsystem = ();
511my @status = ();
512my %deduplicate_name_hash = ();
513my %deduplicate_address_hash = ();
514
515my @maintainers = get_maintainers();
516
517if (@maintainers) {
518    @maintainers = merge_email(@maintainers);
519    output(@maintainers);
520}
521
522if ($scm) {
523    @scm = uniq(@scm);
524    output(@scm);
525}
526
527if ($status) {
528    @status = uniq(@status);
529    output(@status);
530}
531
532if ($subsystem) {
533    @subsystem = uniq(@subsystem);
534    output(@subsystem);
535}
536
537if ($web) {
538    @web = uniq(@web);
539    output(@web);
540}
541
542exit($exit);
543
544sub ignore_email_address {
545    my ($address) = @_;
546
547    foreach my $ignore (@ignore_emails) {
548	return 1 if ($ignore eq $address);
549    }
550
551    return 0;
552}
553
554sub range_is_maintained {
555    my ($start, $end) = @_;
556
557    for (my $i = $start; $i < $end; $i++) {
558	my $line = $typevalue[$i];
559	if ($line =~ m/^([A-Z]):\s*(.*)/) {
560	    my $type = $1;
561	    my $value = $2;
562	    if ($type eq 'S') {
563		if ($value =~ /(maintain|support)/i) {
564		    return 1;
565		}
566	    }
567	}
568    }
569    return 0;
570}
571
572sub range_has_maintainer {
573    my ($start, $end) = @_;
574
575    for (my $i = $start; $i < $end; $i++) {
576	my $line = $typevalue[$i];
577	if ($line =~ m/^([A-Z]):\s*(.*)/) {
578	    my $type = $1;
579	    my $value = $2;
580	    if ($type eq 'M') {
581		return 1;
582	    }
583	}
584    }
585    return 0;
586}
587
588sub get_maintainers {
589    %email_hash_name = ();
590    %email_hash_address = ();
591    %commit_author_hash = ();
592    %commit_signer_hash = ();
593    @email_to = ();
594    %hash_list_to = ();
595    @list_to = ();
596    @scm = ();
597    @web = ();
598    @subsystem = ();
599    @status = ();
600    %deduplicate_name_hash = ();
601    %deduplicate_address_hash = ();
602    if ($email_git_all_signature_types) {
603	$signature_pattern = "(.+?)[Bb][Yy]:";
604    } else {
605	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
606    }
607
608    # Find responsible parties
609
610    my %exact_pattern_match_hash = ();
611
612    foreach my $file (@files) {
613
614	my %hash;
615	my $tvi = find_first_section();
616	while ($tvi < @typevalue) {
617	    my $start = find_starting_index($tvi);
618	    my $end = find_ending_index($tvi);
619	    my $exclude = 0;
620	    my $i;
621
622	    #Do not match excluded file patterns
623
624	    for ($i = $start; $i < $end; $i++) {
625		my $line = $typevalue[$i];
626		if ($line =~ m/^([A-Z]):\s*(.*)/) {
627		    my $type = $1;
628		    my $value = $2;
629		    if ($type eq 'X') {
630			if (file_match_pattern($file, $value)) {
631			    $exclude = 1;
632			    last;
633			}
634		    }
635		}
636	    }
637
638	    if (!$exclude) {
639		for ($i = $start; $i < $end; $i++) {
640		    my $line = $typevalue[$i];
641		    if ($line =~ m/^([A-Z]):\s*(.*)/) {
642			my $type = $1;
643			my $value = $2;
644			if ($type eq 'F') {
645			    if (file_match_pattern($file, $value)) {
646				my $value_pd = ($value =~ tr@/@@);
647				my $file_pd = ($file  =~ tr@/@@);
648				$value_pd++ if (substr($value,-1,1) ne "/");
649				$value_pd = -1 if ($value =~ /^\.\*/);
650				if ($value_pd >= $file_pd &&
651				    range_is_maintained($start, $end) &&
652				    range_has_maintainer($start, $end)) {
653				    $exact_pattern_match_hash{$file} = 1;
654				}
655				if ($pattern_depth == 0 ||
656				    (($file_pd - $value_pd) < $pattern_depth)) {
657				    $hash{$tvi} = $value_pd;
658				}
659			    }
660			} elsif ($type eq 'N') {
661			    if ($file =~ m/$value/x) {
662				$hash{$tvi} = 0;
663			    }
664			}
665		    }
666		}
667	    }
668	    $tvi = $end + 1;
669	}
670
671	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
672	    add_categories($line);
673	    if ($sections) {
674		my $i;
675		my $start = find_starting_index($line);
676		my $end = find_ending_index($line);
677		for ($i = $start; $i < $end; $i++) {
678		    my $line = $typevalue[$i];
679		    if ($line =~ /^[FX]:/) {		##Restore file patterns
680			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
681			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
682			$line =~ s/\\\./\./g;       	##Convert \. to .
683			$line =~ s/\.\*/\*/g;       	##Convert .* to *
684		    }
685		    $line =~ s/^([A-Z]):/$1:\t/g;
686		    print("$line\n");
687		}
688		print("\n");
689	    }
690	}
691    }
692
693    if ($keywords) {
694	@keyword_tvi = sort_and_uniq(@keyword_tvi);
695	foreach my $line (@keyword_tvi) {
696	    add_categories($line);
697	}
698    }
699
700    foreach my $email (@email_to, @list_to) {
701	$email->[0] = deduplicate_email($email->[0]);
702    }
703
704    foreach my $file (@files) {
705	if ($email &&
706	    ($email_git || ($email_git_fallback &&
707			    !$exact_pattern_match_hash{$file}))) {
708	    vcs_file_signoffs($file);
709	}
710	if ($email && $email_git_blame) {
711	    vcs_file_blame($file);
712	}
713    }
714
715    if ($email) {
716	foreach my $chief (@penguin_chief) {
717	    if ($chief =~ m/^(.*):(.*)/) {
718		my $email_address;
719
720		$email_address = format_email($1, $2, $email_usename);
721		if ($email_git_penguin_chiefs) {
722		    push(@email_to, [$email_address, 'chief penguin']);
723		} else {
724		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
725		}
726	    }
727	}
728
729	foreach my $email (@file_emails) {
730	    my ($name, $address) = parse_email($email);
731
732	    my $tmp_email = format_email($name, $address, $email_usename);
733	    push_email_address($tmp_email, '');
734	    add_role($tmp_email, 'in file');
735	}
736    }
737
738    my @to = ();
739    if ($email || $email_list) {
740	if ($email) {
741	    @to = (@to, @email_to);
742	}
743	if ($email_list) {
744	    @to = (@to, @list_to);
745	}
746    }
747
748    if ($interactive) {
749	@to = interactive_get_maintainers(\@to);
750    }
751
752    return @to;
753}
754
755sub file_match_pattern {
756    my ($file, $pattern) = @_;
757    if (substr($pattern, -1) eq "/") {
758	if ($file =~ m@^$pattern@) {
759	    return 1;
760	}
761    } else {
762	if ($file =~ m@^$pattern@) {
763	    my $s1 = ($file =~ tr@/@@);
764	    my $s2 = ($pattern =~ tr@/@@);
765	    if ($s1 == $s2) {
766		return 1;
767	    }
768	}
769    }
770    return 0;
771}
772
773sub usage {
774    print <<EOT;
775usage: $P [options] patchfile
776       $P [options] -f file|directory
777version: $V
778
779MAINTAINER field selection options:
780  --email => print email address(es) if any
781    --git => include recent git \*-by: signers
782    --git-all-signature-types => include signers regardless of signature type
783        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
784    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
785    --git-chief-penguins => include ${penguin_chiefs}
786    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
787    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
788    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
789    --git-blame => use git blame to find modified commits for patch or file
790    --git-blame-signatures => when used with --git-blame, also include all commit signers
791    --git-since => git history to use (default: $email_git_since)
792    --hg-since => hg history to use (default: $email_hg_since)
793    --interactive => display a menu (mostly useful if used with the --git option)
794    --m => include maintainer(s) if any
795    --r => include reviewer(s) if any
796    --n => include name 'Full Name <addr\@domain.tld>'
797    --l => include list(s) if any
798    --s => include subscriber only list(s) if any
799    --remove-duplicates => minimize duplicate email names/addresses
800    --roles => show roles (status:subsystem, git-signer, list, etc...)
801    --rolestats => show roles and statistics (commits/total_commits, %)
802    --file-emails => add email addresses found in -f file (default: 0 (off))
803  --scm => print SCM tree(s) if any
804  --status => print status if any
805  --subsystem => print subsystem name if any
806  --web => print website(s) if any
807
808Output type options:
809  --separator [, ] => separator for multiple entries on 1 line
810    using --separator also sets --nomultiline if --separator is not [, ]
811  --multiline => print 1 entry per line
812
813Other options:
814  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
815  --keywords => scan patch for keywords (default: $keywords)
816  --sections => print all of the subsystem sections with pattern matches
817  --mailmap => use .mailmap file (default: $email_use_mailmap)
818  --version => show version
819  --help => show this help information
820
821Default options:
822  [--email --nogit --git-fallback --m --r --n --l --multiline --pattern-depth=0
823   --remove-duplicates --rolestats]
824
825Notes:
826  Using "-f directory" may give unexpected results:
827      Used with "--git", git signators for _all_ files in and below
828          directory are examined as git recurses directories.
829          Any specified X: (exclude) pattern matches are _not_ ignored.
830      Used with "--nogit", directory is used as a pattern match,
831          no individual file within the directory or subdirectory
832          is matched.
833      Used with "--git-blame", does not iterate all files in directory
834  Using "--git-blame" is slow and may add old committers and authors
835      that are no longer active maintainers to the output.
836  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
837      other automated tools that expect only ["name"] <email address>
838      may not work because of additional output after <email address>.
839  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
840      not the percentage of the entire file authored.  # of commits is
841      not a good measure of amount of code authored.  1 major commit may
842      contain a thousand lines, 5 trivial commits may modify a single line.
843  If git is not installed, but mercurial (hg) is installed and an .hg
844      repository exists, the following options apply to mercurial:
845          --git,
846          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
847          --git-blame
848      Use --hg-since not --git-since to control date selection
849  File ".get_maintainer.conf", if it exists in the linux kernel source root
850      directory, can change whatever get_maintainer defaults are desired.
851      Entries in this file can be any command line argument.
852      This file is prepended to any additional command line arguments.
853      Multiple lines and # comments are allowed.
854  Most options have both positive and negative forms.
855      The negative forms for --<foo> are --no<foo> and --no-<foo>.
856
857EOT
858}
859
860sub top_of_kernel_tree {
861    my ($lk_path) = @_;
862
863    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
864	$lk_path .= "/";
865    }
866    if (   (-f "${lk_path}COPYING")
867	&& (-f "${lk_path}CREDITS")
868	&& (-f "${lk_path}Kbuild")
869	&& (-f "${lk_path}MAINTAINERS")
870	&& (-f "${lk_path}Makefile")
871	&& (-f "${lk_path}README")
872	&& (-d "${lk_path}Documentation")
873	&& (-d "${lk_path}arch")
874	&& (-d "${lk_path}include")
875	&& (-d "${lk_path}drivers")
876	&& (-d "${lk_path}fs")
877	&& (-d "${lk_path}init")
878	&& (-d "${lk_path}ipc")
879	&& (-d "${lk_path}kernel")
880	&& (-d "${lk_path}lib")
881	&& (-d "${lk_path}scripts")) {
882	return 1;
883    }
884    return 0;
885}
886
887sub parse_email {
888    my ($formatted_email) = @_;
889
890    my $name = "";
891    my $address = "";
892
893    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
894	$name = $1;
895	$address = $2;
896    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
897	$address = $1;
898    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
899	$address = $1;
900    }
901
902    $name =~ s/^\s+|\s+$//g;
903    $name =~ s/^\"|\"$//g;
904    $address =~ s/^\s+|\s+$//g;
905
906    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
907	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
908	$name = "\"$name\"";
909    }
910
911    return ($name, $address);
912}
913
914sub format_email {
915    my ($name, $address, $usename) = @_;
916
917    my $formatted_email;
918
919    $name =~ s/^\s+|\s+$//g;
920    $name =~ s/^\"|\"$//g;
921    $address =~ s/^\s+|\s+$//g;
922
923    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
924	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
925	$name = "\"$name\"";
926    }
927
928    if ($usename) {
929	if ("$name" eq "") {
930	    $formatted_email = "$address";
931	} else {
932	    $formatted_email = "$name <$address>";
933	}
934    } else {
935	$formatted_email = $address;
936    }
937
938    return $formatted_email;
939}
940
941sub find_first_section {
942    my $index = 0;
943
944    while ($index < @typevalue) {
945	my $tv = $typevalue[$index];
946	if (($tv =~ m/^([A-Z]):\s*(.*)/)) {
947	    last;
948	}
949	$index++;
950    }
951
952    return $index;
953}
954
955sub find_starting_index {
956    my ($index) = @_;
957
958    while ($index > 0) {
959	my $tv = $typevalue[$index];
960	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
961	    last;
962	}
963	$index--;
964    }
965
966    return $index;
967}
968
969sub find_ending_index {
970    my ($index) = @_;
971
972    while ($index < @typevalue) {
973	my $tv = $typevalue[$index];
974	if (!($tv =~ m/^([A-Z]):\s*(.*)/)) {
975	    last;
976	}
977	$index++;
978    }
979
980    return $index;
981}
982
983sub get_subsystem_name {
984    my ($index) = @_;
985
986    my $start = find_starting_index($index);
987
988    my $subsystem = $typevalue[$start];
989    if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) {
990	$subsystem = substr($subsystem, 0, $output_section_maxlen - 3);
991	$subsystem =~ s/\s*$//;
992	$subsystem = $subsystem . "...";
993    }
994    return $subsystem;
995}
996
997sub get_maintainer_role {
998    my ($index) = @_;
999
1000    my $i;
1001    my $start = find_starting_index($index);
1002    my $end = find_ending_index($index);
1003
1004    my $role = "unknown";
1005    my $subsystem = get_subsystem_name($index);
1006
1007    for ($i = $start + 1; $i < $end; $i++) {
1008	my $tv = $typevalue[$i];
1009	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1010	    my $ptype = $1;
1011	    my $pvalue = $2;
1012	    if ($ptype eq "S") {
1013		$role = $pvalue;
1014	    }
1015	}
1016    }
1017
1018    $role = lc($role);
1019    if      ($role eq "supported") {
1020	$role = "supporter";
1021    } elsif ($role eq "maintained") {
1022	$role = "maintainer";
1023    } elsif ($role eq "odd fixes") {
1024	$role = "odd fixer";
1025    } elsif ($role eq "orphan") {
1026	$role = "orphan minder";
1027    } elsif ($role eq "obsolete") {
1028	$role = "obsolete minder";
1029    } elsif ($role eq "buried alive in reporters") {
1030	$role = "chief penguin";
1031    }
1032
1033    return $role . ":" . $subsystem;
1034}
1035
1036sub get_list_role {
1037    my ($index) = @_;
1038
1039    my $subsystem = get_subsystem_name($index);
1040
1041    if ($subsystem eq "THE REST") {
1042	$subsystem = "";
1043    }
1044
1045    return $subsystem;
1046}
1047
1048sub add_categories {
1049    my ($index) = @_;
1050
1051    my $i;
1052    my $start = find_starting_index($index);
1053    my $end = find_ending_index($index);
1054
1055    push(@subsystem, $typevalue[$start]);
1056
1057    for ($i = $start + 1; $i < $end; $i++) {
1058	my $tv = $typevalue[$i];
1059	if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1060	    my $ptype = $1;
1061	    my $pvalue = $2;
1062	    if ($ptype eq "L") {
1063		my $list_address = $pvalue;
1064		my $list_additional = "";
1065		my $list_role = get_list_role($i);
1066
1067		if ($list_role ne "") {
1068		    $list_role = ":" . $list_role;
1069		}
1070		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1071		    $list_address = $1;
1072		    $list_additional = $2;
1073		}
1074		if ($list_additional =~ m/subscribers-only/) {
1075		    if ($email_subscriber_list) {
1076			if (!$hash_list_to{lc($list_address)}) {
1077			    $hash_list_to{lc($list_address)} = 1;
1078			    push(@list_to, [$list_address,
1079					    "subscriber list${list_role}"]);
1080			}
1081		    }
1082		} else {
1083		    if ($email_list) {
1084			if (!$hash_list_to{lc($list_address)}) {
1085			    $hash_list_to{lc($list_address)} = 1;
1086			    if ($list_additional =~ m/moderated/) {
1087				push(@list_to, [$list_address,
1088						"moderated list${list_role}"]);
1089			    } else {
1090				push(@list_to, [$list_address,
1091						"open list${list_role}"]);
1092			    }
1093			}
1094		    }
1095		}
1096	    } elsif ($ptype eq "M") {
1097		my ($name, $address) = parse_email($pvalue);
1098		if ($name eq "") {
1099		    if ($i > 0) {
1100			my $tv = $typevalue[$i - 1];
1101			if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1102			    if ($1 eq "P") {
1103				$name = $2;
1104				$pvalue = format_email($name, $address, $email_usename);
1105			    }
1106			}
1107		    }
1108		}
1109		if ($email_maintainer) {
1110		    my $role = get_maintainer_role($i);
1111		    push_email_addresses($pvalue, $role);
1112		}
1113	    } elsif ($ptype eq "R") {
1114		my ($name, $address) = parse_email($pvalue);
1115		if ($name eq "") {
1116		    if ($i > 0) {
1117			my $tv = $typevalue[$i - 1];
1118			if ($tv =~ m/^([A-Z]):\s*(.*)/) {
1119			    if ($1 eq "P") {
1120				$name = $2;
1121				$pvalue = format_email($name, $address, $email_usename);
1122			    }
1123			}
1124		    }
1125		}
1126		if ($email_reviewer) {
1127		    my $subsystem = get_subsystem_name($i);
1128		    push_email_addresses($pvalue, "reviewer:$subsystem");
1129		}
1130	    } elsif ($ptype eq "T") {
1131		push(@scm, $pvalue);
1132	    } elsif ($ptype eq "W") {
1133		push(@web, $pvalue);
1134	    } elsif ($ptype eq "S") {
1135		push(@status, $pvalue);
1136	    }
1137	}
1138    }
1139}
1140
1141sub email_inuse {
1142    my ($name, $address) = @_;
1143
1144    return 1 if (($name eq "") && ($address eq ""));
1145    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1146    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1147
1148    return 0;
1149}
1150
1151sub push_email_address {
1152    my ($line, $role) = @_;
1153
1154    my ($name, $address) = parse_email($line);
1155
1156    if ($address eq "") {
1157	return 0;
1158    }
1159
1160    if (!$email_remove_duplicates) {
1161	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1162    } elsif (!email_inuse($name, $address)) {
1163	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1164	$email_hash_name{lc($name)}++ if ($name ne "");
1165	$email_hash_address{lc($address)}++;
1166    }
1167
1168    return 1;
1169}
1170
1171sub push_email_addresses {
1172    my ($address, $role) = @_;
1173
1174    my @address_list = ();
1175
1176    if (rfc822_valid($address)) {
1177	push_email_address($address, $role);
1178    } elsif (@address_list = rfc822_validlist($address)) {
1179	my $array_count = shift(@address_list);
1180	while (my $entry = shift(@address_list)) {
1181	    push_email_address($entry, $role);
1182	}
1183    } else {
1184	if (!push_email_address($address, $role)) {
1185	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1186	}
1187    }
1188}
1189
1190sub add_role {
1191    my ($line, $role) = @_;
1192
1193    my ($name, $address) = parse_email($line);
1194    my $email = format_email($name, $address, $email_usename);
1195
1196    foreach my $entry (@email_to) {
1197	if ($email_remove_duplicates) {
1198	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1199	    if (($name eq $entry_name || $address eq $entry_address)
1200		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1201	    ) {
1202		if ($entry->[1] eq "") {
1203		    $entry->[1] = "$role";
1204		} else {
1205		    $entry->[1] = "$entry->[1],$role";
1206		}
1207	    }
1208	} else {
1209	    if ($email eq $entry->[0]
1210		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1211	    ) {
1212		if ($entry->[1] eq "") {
1213		    $entry->[1] = "$role";
1214		} else {
1215		    $entry->[1] = "$entry->[1],$role";
1216		}
1217	    }
1218	}
1219    }
1220}
1221
1222sub which {
1223    my ($bin) = @_;
1224
1225    foreach my $path (split(/:/, $ENV{PATH})) {
1226	if (-e "$path/$bin") {
1227	    return "$path/$bin";
1228	}
1229    }
1230
1231    return "";
1232}
1233
1234sub which_conf {
1235    my ($conf) = @_;
1236
1237    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1238	if (-e "$path/$conf") {
1239	    return "$path/$conf";
1240	}
1241    }
1242
1243    return "";
1244}
1245
1246sub mailmap_email {
1247    my ($line) = @_;
1248
1249    my ($name, $address) = parse_email($line);
1250    my $email = format_email($name, $address, 1);
1251    my $real_name = $name;
1252    my $real_address = $address;
1253
1254    if (exists $mailmap->{names}->{$email} ||
1255	exists $mailmap->{addresses}->{$email}) {
1256	if (exists $mailmap->{names}->{$email}) {
1257	    $real_name = $mailmap->{names}->{$email};
1258	}
1259	if (exists $mailmap->{addresses}->{$email}) {
1260	    $real_address = $mailmap->{addresses}->{$email};
1261	}
1262    } else {
1263	if (exists $mailmap->{names}->{$address}) {
1264	    $real_name = $mailmap->{names}->{$address};
1265	}
1266	if (exists $mailmap->{addresses}->{$address}) {
1267	    $real_address = $mailmap->{addresses}->{$address};
1268	}
1269    }
1270    return format_email($real_name, $real_address, 1);
1271}
1272
1273sub mailmap {
1274    my (@addresses) = @_;
1275
1276    my @mapped_emails = ();
1277    foreach my $line (@addresses) {
1278	push(@mapped_emails, mailmap_email($line));
1279    }
1280    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1281    return @mapped_emails;
1282}
1283
1284sub merge_by_realname {
1285    my %address_map;
1286    my (@emails) = @_;
1287
1288    foreach my $email (@emails) {
1289	my ($name, $address) = parse_email($email);
1290	if (exists $address_map{$name}) {
1291	    $address = $address_map{$name};
1292	    $email = format_email($name, $address, 1);
1293	} else {
1294	    $address_map{$name} = $address;
1295	}
1296    }
1297}
1298
1299sub git_execute_cmd {
1300    my ($cmd) = @_;
1301    my @lines = ();
1302
1303    my $output = `$cmd`;
1304    $output =~ s/^\s*//gm;
1305    @lines = split("\n", $output);
1306
1307    return @lines;
1308}
1309
1310sub hg_execute_cmd {
1311    my ($cmd) = @_;
1312    my @lines = ();
1313
1314    my $output = `$cmd`;
1315    @lines = split("\n", $output);
1316
1317    return @lines;
1318}
1319
1320sub extract_formatted_signatures {
1321    my (@signature_lines) = @_;
1322
1323    my @type = @signature_lines;
1324
1325    s/\s*(.*):.*/$1/ for (@type);
1326
1327    # cut -f2- -d":"
1328    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1329
1330## Reformat email addresses (with names) to avoid badly written signatures
1331
1332    foreach my $signer (@signature_lines) {
1333	$signer = deduplicate_email($signer);
1334    }
1335
1336    return (\@type, \@signature_lines);
1337}
1338
1339sub vcs_find_signers {
1340    my ($cmd, $file) = @_;
1341    my $commits;
1342    my @lines = ();
1343    my @signatures = ();
1344    my @authors = ();
1345    my @stats = ();
1346
1347    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1348
1349    my $pattern = $VCS_cmds{"commit_pattern"};
1350    my $author_pattern = $VCS_cmds{"author_pattern"};
1351    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1352
1353    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1354
1355    $commits = grep(/$pattern/, @lines);	# of commits
1356
1357    @authors = grep(/$author_pattern/, @lines);
1358    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1359    @stats = grep(/$stat_pattern/, @lines);
1360
1361#    print("stats: <@stats>\n");
1362
1363    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1364
1365    save_commits_by_author(@lines) if ($interactive);
1366    save_commits_by_signer(@lines) if ($interactive);
1367
1368    if (!$email_git_penguin_chiefs) {
1369	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1370    }
1371
1372    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1373    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1374
1375    return ($commits, $signers_ref, $authors_ref, \@stats);
1376}
1377
1378sub vcs_find_author {
1379    my ($cmd) = @_;
1380    my @lines = ();
1381
1382    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1383
1384    if (!$email_git_penguin_chiefs) {
1385	@lines = grep(!/${penguin_chiefs}/i, @lines);
1386    }
1387
1388    return @lines if !@lines;
1389
1390    my @authors = ();
1391    foreach my $line (@lines) {
1392	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1393	    my $author = $1;
1394	    my ($name, $address) = parse_email($author);
1395	    $author = format_email($name, $address, 1);
1396	    push(@authors, $author);
1397	}
1398    }
1399
1400    save_commits_by_author(@lines) if ($interactive);
1401    save_commits_by_signer(@lines) if ($interactive);
1402
1403    return @authors;
1404}
1405
1406sub vcs_save_commits {
1407    my ($cmd) = @_;
1408    my @lines = ();
1409    my @commits = ();
1410
1411    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1412
1413    foreach my $line (@lines) {
1414	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1415	    push(@commits, $1);
1416	}
1417    }
1418
1419    return @commits;
1420}
1421
1422sub vcs_blame {
1423    my ($file) = @_;
1424    my $cmd;
1425    my @commits = ();
1426
1427    return @commits if (!(-f $file));
1428
1429    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1430	my @all_commits = ();
1431
1432	$cmd = $VCS_cmds{"blame_file_cmd"};
1433	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1434	@all_commits = vcs_save_commits($cmd);
1435
1436	foreach my $file_range_diff (@range) {
1437	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1438	    my $diff_file = $1;
1439	    my $diff_start = $2;
1440	    my $diff_length = $3;
1441	    next if ("$file" ne "$diff_file");
1442	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1443		push(@commits, $all_commits[$i]);
1444	    }
1445	}
1446    } elsif (@range) {
1447	foreach my $file_range_diff (@range) {
1448	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1449	    my $diff_file = $1;
1450	    my $diff_start = $2;
1451	    my $diff_length = $3;
1452	    next if ("$file" ne "$diff_file");
1453	    $cmd = $VCS_cmds{"blame_range_cmd"};
1454	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1455	    push(@commits, vcs_save_commits($cmd));
1456	}
1457    } else {
1458	$cmd = $VCS_cmds{"blame_file_cmd"};
1459	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1460	@commits = vcs_save_commits($cmd);
1461    }
1462
1463    foreach my $commit (@commits) {
1464	$commit =~ s/^\^//g;
1465    }
1466
1467    return @commits;
1468}
1469
1470my $printed_novcs = 0;
1471sub vcs_exists {
1472    %VCS_cmds = %VCS_cmds_git;
1473    return 1 if eval $VCS_cmds{"available"};
1474    %VCS_cmds = %VCS_cmds_hg;
1475    return 2 if eval $VCS_cmds{"available"};
1476    %VCS_cmds = ();
1477    if (!$printed_novcs) {
1478	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1479	warn("Using a git repository produces better results.\n");
1480	warn("Try Linus Torvalds' latest git repository using:\n");
1481	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1482	$printed_novcs = 1;
1483    }
1484    return 0;
1485}
1486
1487sub vcs_is_git {
1488    vcs_exists();
1489    return $vcs_used == 1;
1490}
1491
1492sub vcs_is_hg {
1493    return $vcs_used == 2;
1494}
1495
1496sub interactive_get_maintainers {
1497    my ($list_ref) = @_;
1498    my @list = @$list_ref;
1499
1500    vcs_exists();
1501
1502    my %selected;
1503    my %authored;
1504    my %signed;
1505    my $count = 0;
1506    my $maintained = 0;
1507    foreach my $entry (@list) {
1508	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1509	$selected{$count} = 1;
1510	$authored{$count} = 0;
1511	$signed{$count} = 0;
1512	$count++;
1513    }
1514
1515    #menu loop
1516    my $done = 0;
1517    my $print_options = 0;
1518    my $redraw = 1;
1519    while (!$done) {
1520	$count = 0;
1521	if ($redraw) {
1522	    printf STDERR "\n%1s %2s %-65s",
1523			  "*", "#", "email/list and role:stats";
1524	    if ($email_git ||
1525		($email_git_fallback && !$maintained) ||
1526		$email_git_blame) {
1527		print STDERR "auth sign";
1528	    }
1529	    print STDERR "\n";
1530	    foreach my $entry (@list) {
1531		my $email = $entry->[0];
1532		my $role = $entry->[1];
1533		my $sel = "";
1534		$sel = "*" if ($selected{$count});
1535		my $commit_author = $commit_author_hash{$email};
1536		my $commit_signer = $commit_signer_hash{$email};
1537		my $authored = 0;
1538		my $signed = 0;
1539		$authored++ for (@{$commit_author});
1540		$signed++ for (@{$commit_signer});
1541		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1542		printf STDERR "%4d %4d", $authored, $signed
1543		    if ($authored > 0 || $signed > 0);
1544		printf STDERR "\n     %s\n", $role;
1545		if ($authored{$count}) {
1546		    my $commit_author = $commit_author_hash{$email};
1547		    foreach my $ref (@{$commit_author}) {
1548			print STDERR "     Author: @{$ref}[1]\n";
1549		    }
1550		}
1551		if ($signed{$count}) {
1552		    my $commit_signer = $commit_signer_hash{$email};
1553		    foreach my $ref (@{$commit_signer}) {
1554			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1555		    }
1556		}
1557
1558		$count++;
1559	    }
1560	}
1561	my $date_ref = \$email_git_since;
1562	$date_ref = \$email_hg_since if (vcs_is_hg());
1563	if ($print_options) {
1564	    $print_options = 0;
1565	    if (vcs_exists()) {
1566		print STDERR <<EOT
1567
1568Version Control options:
1569g  use git history      [$email_git]
1570gf use git-fallback     [$email_git_fallback]
1571b  use git blame        [$email_git_blame]
1572bs use blame signatures [$email_git_blame_signatures]
1573c# minimum commits      [$email_git_min_signatures]
1574%# min percent          [$email_git_min_percent]
1575d# history to use       [$$date_ref]
1576x# max maintainers      [$email_git_max_maintainers]
1577t  all signature types  [$email_git_all_signature_types]
1578m  use .mailmap         [$email_use_mailmap]
1579EOT
1580	    }
1581	    print STDERR <<EOT
1582
1583Additional options:
15840  toggle all
1585tm toggle maintainers
1586tg toggle git entries
1587tl toggle open list entries
1588ts toggle subscriber list entries
1589f  emails in file       [$file_emails]
1590k  keywords in file     [$keywords]
1591r  remove duplicates    [$email_remove_duplicates]
1592p# pattern match depth  [$pattern_depth]
1593EOT
1594	}
1595	print STDERR
1596"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1597
1598	my $input = <STDIN>;
1599	chomp($input);
1600
1601	$redraw = 1;
1602	my $rerun = 0;
1603	my @wish = split(/[, ]+/, $input);
1604	foreach my $nr (@wish) {
1605	    $nr = lc($nr);
1606	    my $sel = substr($nr, 0, 1);
1607	    my $str = substr($nr, 1);
1608	    my $val = 0;
1609	    $val = $1 if $str =~ /^(\d+)$/;
1610
1611	    if ($sel eq "y") {
1612		$interactive = 0;
1613		$done = 1;
1614		$output_rolestats = 0;
1615		$output_roles = 0;
1616		last;
1617	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1618		$selected{$nr - 1} = !$selected{$nr - 1};
1619	    } elsif ($sel eq "*" || $sel eq '^') {
1620		my $toggle = 0;
1621		$toggle = 1 if ($sel eq '*');
1622		for (my $i = 0; $i < $count; $i++) {
1623		    $selected{$i} = $toggle;
1624		}
1625	    } elsif ($sel eq "0") {
1626		for (my $i = 0; $i < $count; $i++) {
1627		    $selected{$i} = !$selected{$i};
1628		}
1629	    } elsif ($sel eq "t") {
1630		if (lc($str) eq "m") {
1631		    for (my $i = 0; $i < $count; $i++) {
1632			$selected{$i} = !$selected{$i}
1633			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1634		    }
1635		} elsif (lc($str) eq "g") {
1636		    for (my $i = 0; $i < $count; $i++) {
1637			$selected{$i} = !$selected{$i}
1638			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1639		    }
1640		} elsif (lc($str) eq "l") {
1641		    for (my $i = 0; $i < $count; $i++) {
1642			$selected{$i} = !$selected{$i}
1643			    if ($list[$i]->[1] =~ /^(open list)/i);
1644		    }
1645		} elsif (lc($str) eq "s") {
1646		    for (my $i = 0; $i < $count; $i++) {
1647			$selected{$i} = !$selected{$i}
1648			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1649		    }
1650		}
1651	    } elsif ($sel eq "a") {
1652		if ($val > 0 && $val <= $count) {
1653		    $authored{$val - 1} = !$authored{$val - 1};
1654		} elsif ($str eq '*' || $str eq '^') {
1655		    my $toggle = 0;
1656		    $toggle = 1 if ($str eq '*');
1657		    for (my $i = 0; $i < $count; $i++) {
1658			$authored{$i} = $toggle;
1659		    }
1660		}
1661	    } elsif ($sel eq "s") {
1662		if ($val > 0 && $val <= $count) {
1663		    $signed{$val - 1} = !$signed{$val - 1};
1664		} elsif ($str eq '*' || $str eq '^') {
1665		    my $toggle = 0;
1666		    $toggle = 1 if ($str eq '*');
1667		    for (my $i = 0; $i < $count; $i++) {
1668			$signed{$i} = $toggle;
1669		    }
1670		}
1671	    } elsif ($sel eq "o") {
1672		$print_options = 1;
1673		$redraw = 1;
1674	    } elsif ($sel eq "g") {
1675		if ($str eq "f") {
1676		    bool_invert(\$email_git_fallback);
1677		} else {
1678		    bool_invert(\$email_git);
1679		}
1680		$rerun = 1;
1681	    } elsif ($sel eq "b") {
1682		if ($str eq "s") {
1683		    bool_invert(\$email_git_blame_signatures);
1684		} else {
1685		    bool_invert(\$email_git_blame);
1686		}
1687		$rerun = 1;
1688	    } elsif ($sel eq "c") {
1689		if ($val > 0) {
1690		    $email_git_min_signatures = $val;
1691		    $rerun = 1;
1692		}
1693	    } elsif ($sel eq "x") {
1694		if ($val > 0) {
1695		    $email_git_max_maintainers = $val;
1696		    $rerun = 1;
1697		}
1698	    } elsif ($sel eq "%") {
1699		if ($str ne "" && $val >= 0) {
1700		    $email_git_min_percent = $val;
1701		    $rerun = 1;
1702		}
1703	    } elsif ($sel eq "d") {
1704		if (vcs_is_git()) {
1705		    $email_git_since = $str;
1706		} elsif (vcs_is_hg()) {
1707		    $email_hg_since = $str;
1708		}
1709		$rerun = 1;
1710	    } elsif ($sel eq "t") {
1711		bool_invert(\$email_git_all_signature_types);
1712		$rerun = 1;
1713	    } elsif ($sel eq "f") {
1714		bool_invert(\$file_emails);
1715		$rerun = 1;
1716	    } elsif ($sel eq "r") {
1717		bool_invert(\$email_remove_duplicates);
1718		$rerun = 1;
1719	    } elsif ($sel eq "m") {
1720		bool_invert(\$email_use_mailmap);
1721		read_mailmap();
1722		$rerun = 1;
1723	    } elsif ($sel eq "k") {
1724		bool_invert(\$keywords);
1725		$rerun = 1;
1726	    } elsif ($sel eq "p") {
1727		if ($str ne "" && $val >= 0) {
1728		    $pattern_depth = $val;
1729		    $rerun = 1;
1730		}
1731	    } elsif ($sel eq "h" || $sel eq "?") {
1732		print STDERR <<EOT
1733
1734Interactive mode allows you to select the various maintainers, submitters,
1735commit signers and mailing lists that could be CC'd on a patch.
1736
1737Any *'d entry is selected.
1738
1739If you have git or hg installed, you can choose to summarize the commit
1740history of files in the patch.  Also, each line of the current file can
1741be matched to its commit author and that commits signers with blame.
1742
1743Various knobs exist to control the length of time for active commit
1744tracking, the maximum number of commit authors and signers to add,
1745and such.
1746
1747Enter selections at the prompt until you are satisfied that the selected
1748maintainers are appropriate.  You may enter multiple selections separated
1749by either commas or spaces.
1750
1751EOT
1752	    } else {
1753		print STDERR "invalid option: '$nr'\n";
1754		$redraw = 0;
1755	    }
1756	}
1757	if ($rerun) {
1758	    print STDERR "git-blame can be very slow, please have patience..."
1759		if ($email_git_blame);
1760	    goto &get_maintainers;
1761	}
1762    }
1763
1764    #drop not selected entries
1765    $count = 0;
1766    my @new_emailto = ();
1767    foreach my $entry (@list) {
1768	if ($selected{$count}) {
1769	    push(@new_emailto, $list[$count]);
1770	}
1771	$count++;
1772    }
1773    return @new_emailto;
1774}
1775
1776sub bool_invert {
1777    my ($bool_ref) = @_;
1778
1779    if ($$bool_ref) {
1780	$$bool_ref = 0;
1781    } else {
1782	$$bool_ref = 1;
1783    }
1784}
1785
1786sub deduplicate_email {
1787    my ($email) = @_;
1788
1789    my $matched = 0;
1790    my ($name, $address) = parse_email($email);
1791    $email = format_email($name, $address, 1);
1792    $email = mailmap_email($email);
1793
1794    return $email if (!$email_remove_duplicates);
1795
1796    ($name, $address) = parse_email($email);
1797
1798    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
1799	$name = $deduplicate_name_hash{lc($name)}->[0];
1800	$address = $deduplicate_name_hash{lc($name)}->[1];
1801	$matched = 1;
1802    } elsif ($deduplicate_address_hash{lc($address)}) {
1803	$name = $deduplicate_address_hash{lc($address)}->[0];
1804	$address = $deduplicate_address_hash{lc($address)}->[1];
1805	$matched = 1;
1806    }
1807    if (!$matched) {
1808	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
1809	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
1810    }
1811    $email = format_email($name, $address, 1);
1812    $email = mailmap_email($email);
1813    return $email;
1814}
1815
1816sub save_commits_by_author {
1817    my (@lines) = @_;
1818
1819    my @authors = ();
1820    my @commits = ();
1821    my @subjects = ();
1822
1823    foreach my $line (@lines) {
1824	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1825	    my $author = $1;
1826	    $author = deduplicate_email($author);
1827	    push(@authors, $author);
1828	}
1829	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1830	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1831    }
1832
1833    for (my $i = 0; $i < @authors; $i++) {
1834	my $exists = 0;
1835	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
1836	    if (@{$ref}[0] eq $commits[$i] &&
1837		@{$ref}[1] eq $subjects[$i]) {
1838		$exists = 1;
1839		last;
1840	    }
1841	}
1842	if (!$exists) {
1843	    push(@{$commit_author_hash{$authors[$i]}},
1844		 [ ($commits[$i], $subjects[$i]) ]);
1845	}
1846    }
1847}
1848
1849sub save_commits_by_signer {
1850    my (@lines) = @_;
1851
1852    my $commit = "";
1853    my $subject = "";
1854
1855    foreach my $line (@lines) {
1856	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1857	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1858	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
1859	    my @signatures = ($line);
1860	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1861	    my @types = @$types_ref;
1862	    my @signers = @$signers_ref;
1863
1864	    my $type = $types[0];
1865	    my $signer = $signers[0];
1866
1867	    $signer = deduplicate_email($signer);
1868
1869	    my $exists = 0;
1870	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
1871		if (@{$ref}[0] eq $commit &&
1872		    @{$ref}[1] eq $subject &&
1873		    @{$ref}[2] eq $type) {
1874		    $exists = 1;
1875		    last;
1876		}
1877	    }
1878	    if (!$exists) {
1879		push(@{$commit_signer_hash{$signer}},
1880		     [ ($commit, $subject, $type) ]);
1881	    }
1882	}
1883    }
1884}
1885
1886sub vcs_assign {
1887    my ($role, $divisor, @lines) = @_;
1888
1889    my %hash;
1890    my $count = 0;
1891
1892    return if (@lines <= 0);
1893
1894    if ($divisor <= 0) {
1895	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
1896	$divisor = 1;
1897    }
1898
1899    @lines = mailmap(@lines);
1900
1901    return if (@lines <= 0);
1902
1903    @lines = sort(@lines);
1904
1905    # uniq -c
1906    $hash{$_}++ for @lines;
1907
1908    # sort -rn
1909    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
1910	my $sign_offs = $hash{$line};
1911	my $percent = $sign_offs * 100 / $divisor;
1912
1913	$percent = 100 if ($percent > 100);
1914	next if (ignore_email_address($line));
1915	$count++;
1916	last if ($sign_offs < $email_git_min_signatures ||
1917		 $count > $email_git_max_maintainers ||
1918		 $percent < $email_git_min_percent);
1919	push_email_address($line, '');
1920	if ($output_rolestats) {
1921	    my $fmt_percent = sprintf("%.0f", $percent);
1922	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
1923	} else {
1924	    add_role($line, $role);
1925	}
1926    }
1927}
1928
1929sub vcs_file_signoffs {
1930    my ($file) = @_;
1931
1932    my $authors_ref;
1933    my $signers_ref;
1934    my $stats_ref;
1935    my @authors = ();
1936    my @signers = ();
1937    my @stats = ();
1938    my $commits;
1939
1940    $vcs_used = vcs_exists();
1941    return if (!$vcs_used);
1942
1943    my $cmd = $VCS_cmds{"find_signers_cmd"};
1944    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
1945
1946    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
1947
1948    @signers = @{$signers_ref} if defined $signers_ref;
1949    @authors = @{$authors_ref} if defined $authors_ref;
1950    @stats = @{$stats_ref} if defined $stats_ref;
1951
1952#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
1953
1954    foreach my $signer (@signers) {
1955	$signer = deduplicate_email($signer);
1956    }
1957
1958    vcs_assign("commit_signer", $commits, @signers);
1959    vcs_assign("authored", $commits, @authors);
1960    if ($#authors == $#stats) {
1961	my $stat_pattern = $VCS_cmds{"stat_pattern"};
1962	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
1963
1964	my $added = 0;
1965	my $deleted = 0;
1966	for (my $i = 0; $i <= $#stats; $i++) {
1967	    if ($stats[$i] =~ /$stat_pattern/) {
1968		$added += $1;
1969		$deleted += $2;
1970	    }
1971	}
1972	my @tmp_authors = uniq(@authors);
1973	foreach my $author (@tmp_authors) {
1974	    $author = deduplicate_email($author);
1975	}
1976	@tmp_authors = uniq(@tmp_authors);
1977	my @list_added = ();
1978	my @list_deleted = ();
1979	foreach my $author (@tmp_authors) {
1980	    my $auth_added = 0;
1981	    my $auth_deleted = 0;
1982	    for (my $i = 0; $i <= $#stats; $i++) {
1983		if ($author eq deduplicate_email($authors[$i]) &&
1984		    $stats[$i] =~ /$stat_pattern/) {
1985		    $auth_added += $1;
1986		    $auth_deleted += $2;
1987		}
1988	    }
1989	    for (my $i = 0; $i < $auth_added; $i++) {
1990		push(@list_added, $author);
1991	    }
1992	    for (my $i = 0; $i < $auth_deleted; $i++) {
1993		push(@list_deleted, $author);
1994	    }
1995	}
1996	vcs_assign("added_lines", $added, @list_added);
1997	vcs_assign("removed_lines", $deleted, @list_deleted);
1998    }
1999}
2000
2001sub vcs_file_blame {
2002    my ($file) = @_;
2003
2004    my @signers = ();
2005    my @all_commits = ();
2006    my @commits = ();
2007    my $total_commits;
2008    my $total_lines;
2009
2010    $vcs_used = vcs_exists();
2011    return if (!$vcs_used);
2012
2013    @all_commits = vcs_blame($file);
2014    @commits = uniq(@all_commits);
2015    $total_commits = @commits;
2016    $total_lines = @all_commits;
2017
2018    if ($email_git_blame_signatures) {
2019	if (vcs_is_hg()) {
2020	    my $commit_count;
2021	    my $commit_authors_ref;
2022	    my $commit_signers_ref;
2023	    my $stats_ref;
2024	    my @commit_authors = ();
2025	    my @commit_signers = ();
2026	    my $commit = join(" -r ", @commits);
2027	    my $cmd;
2028
2029	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
2030	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2031
2032	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2033	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2034	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2035
2036	    push(@signers, @commit_signers);
2037	} else {
2038	    foreach my $commit (@commits) {
2039		my $commit_count;
2040		my $commit_authors_ref;
2041		my $commit_signers_ref;
2042		my $stats_ref;
2043		my @commit_authors = ();
2044		my @commit_signers = ();
2045		my $cmd;
2046
2047		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
2048		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2049
2050		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2051		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2052		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2053
2054		push(@signers, @commit_signers);
2055	    }
2056	}
2057    }
2058
2059    if ($from_filename) {
2060	if ($output_rolestats) {
2061	    my @blame_signers;
2062	    if (vcs_is_hg()) {{		# Double brace for last exit
2063		my $commit_count;
2064		my @commit_signers = ();
2065		@commits = uniq(@commits);
2066		@commits = sort(@commits);
2067		my $commit = join(" -r ", @commits);
2068		my $cmd;
2069
2070		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2071		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2072
2073		my @lines = ();
2074
2075		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2076
2077		if (!$email_git_penguin_chiefs) {
2078		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2079		}
2080
2081		last if !@lines;
2082
2083		my @authors = ();
2084		foreach my $line (@lines) {
2085		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2086			my $author = $1;
2087			$author = deduplicate_email($author);
2088			push(@authors, $author);
2089		    }
2090		}
2091
2092		save_commits_by_author(@lines) if ($interactive);
2093		save_commits_by_signer(@lines) if ($interactive);
2094
2095		push(@signers, @authors);
2096	    }}
2097	    else {
2098		foreach my $commit (@commits) {
2099		    my $i;
2100		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2101		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2102		    my @author = vcs_find_author($cmd);
2103		    next if !@author;
2104
2105		    my $formatted_author = deduplicate_email($author[0]);
2106
2107		    my $count = grep(/$commit/, @all_commits);
2108		    for ($i = 0; $i < $count ; $i++) {
2109			push(@blame_signers, $formatted_author);
2110		    }
2111		}
2112	    }
2113	    if (@blame_signers) {
2114		vcs_assign("authored lines", $total_lines, @blame_signers);
2115	    }
2116	}
2117	foreach my $signer (@signers) {
2118	    $signer = deduplicate_email($signer);
2119	}
2120	vcs_assign("commits", $total_commits, @signers);
2121    } else {
2122	foreach my $signer (@signers) {
2123	    $signer = deduplicate_email($signer);
2124	}
2125	vcs_assign("modified commits", $total_commits, @signers);
2126    }
2127}
2128
2129sub vcs_file_exists {
2130    my ($file) = @_;
2131
2132    my $exists;
2133
2134    my $vcs_used = vcs_exists();
2135    return 0 if (!$vcs_used);
2136
2137    my $cmd = $VCS_cmds{"file_exists_cmd"};
2138    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
2139    $cmd .= " 2>&1";
2140    $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
2141
2142    return 0 if ($? != 0);
2143
2144    return $exists;
2145}
2146
2147sub uniq {
2148    my (@parms) = @_;
2149
2150    my %saw;
2151    @parms = grep(!$saw{$_}++, @parms);
2152    return @parms;
2153}
2154
2155sub sort_and_uniq {
2156    my (@parms) = @_;
2157
2158    my %saw;
2159    @parms = sort @parms;
2160    @parms = grep(!$saw{$_}++, @parms);
2161    return @parms;
2162}
2163
2164sub clean_file_emails {
2165    my (@file_emails) = @_;
2166    my @fmt_emails = ();
2167
2168    foreach my $email (@file_emails) {
2169	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2170	my ($name, $address) = parse_email($email);
2171	if ($name eq '"[,\.]"') {
2172	    $name = "";
2173	}
2174
2175	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2176	if (@nw > 2) {
2177	    my $first = $nw[@nw - 3];
2178	    my $middle = $nw[@nw - 2];
2179	    my $last = $nw[@nw - 1];
2180
2181	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2182		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2183		(length($middle) == 1 ||
2184		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2185		$name = "$first $middle $last";
2186	    } else {
2187		$name = "$middle $last";
2188	    }
2189	}
2190
2191	if (substr($name, -1) =~ /[,\.]/) {
2192	    $name = substr($name, 0, length($name) - 1);
2193	} elsif (substr($name, -2) =~ /[,\.]"/) {
2194	    $name = substr($name, 0, length($name) - 2) . '"';
2195	}
2196
2197	if (substr($name, 0, 1) =~ /[,\.]/) {
2198	    $name = substr($name, 1, length($name) - 1);
2199	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2200	    $name = '"' . substr($name, 2, length($name) - 2);
2201	}
2202
2203	my $fmt_email = format_email($name, $address, $email_usename);
2204	push(@fmt_emails, $fmt_email);
2205    }
2206    return @fmt_emails;
2207}
2208
2209sub merge_email {
2210    my @lines;
2211    my %saw;
2212
2213    for (@_) {
2214	my ($address, $role) = @$_;
2215	if (!$saw{$address}) {
2216	    if ($output_roles) {
2217		push(@lines, "$address ($role)");
2218	    } else {
2219		push(@lines, $address);
2220	    }
2221	    $saw{$address} = 1;
2222	}
2223    }
2224
2225    return @lines;
2226}
2227
2228sub output {
2229    my (@parms) = @_;
2230
2231    if ($output_multiline) {
2232	foreach my $line (@parms) {
2233	    print("${line}\n");
2234	}
2235    } else {
2236	print(join($output_separator, @parms));
2237	print("\n");
2238    }
2239}
2240
2241my $rfc822re;
2242
2243sub make_rfc822re {
2244#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2245#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2246#   This regexp will only work on addresses which have had comments stripped
2247#   and replaced with rfc822_lwsp.
2248
2249    my $specials = '()<>@,;:\\\\".\\[\\]';
2250    my $controls = '\\000-\\037\\177';
2251
2252    my $dtext = "[^\\[\\]\\r\\\\]";
2253    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2254
2255    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2256
2257#   Use zero-width assertion to spot the limit of an atom.  A simple
2258#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2259    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2260    my $word = "(?:$atom|$quoted_string)";
2261    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2262
2263    my $sub_domain = "(?:$atom|$domain_literal)";
2264    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2265
2266    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2267
2268    my $phrase = "$word*";
2269    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2270    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2271    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2272
2273    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2274    my $address = "(?:$mailbox|$group)";
2275
2276    return "$rfc822_lwsp*$address";
2277}
2278
2279sub rfc822_strip_comments {
2280    my $s = shift;
2281#   Recursively remove comments, and replace with a single space.  The simpler
2282#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2283#   chars in atoms, for example.
2284
2285    while ($s =~ s/^((?:[^"\\]|\\.)*
2286                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2287                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2288    return $s;
2289}
2290
2291#   valid: returns true if the parameter is an RFC822 valid address
2292#
2293sub rfc822_valid {
2294    my $s = rfc822_strip_comments(shift);
2295
2296    if (!$rfc822re) {
2297        $rfc822re = make_rfc822re();
2298    }
2299
2300    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2301}
2302
2303#   validlist: In scalar context, returns true if the parameter is an RFC822
2304#              valid list of addresses.
2305#
2306#              In list context, returns an empty list on failure (an invalid
2307#              address was found); otherwise a list whose first element is the
2308#              number of addresses found and whose remaining elements are the
2309#              addresses.  This is needed to disambiguate failure (invalid)
2310#              from success with no addresses found, because an empty string is
2311#              a valid list.
2312
2313sub rfc822_validlist {
2314    my $s = rfc822_strip_comments(shift);
2315
2316    if (!$rfc822re) {
2317        $rfc822re = make_rfc822re();
2318    }
2319    # * null list items are valid according to the RFC
2320    # * the '1' business is to aid in distinguishing failure from no results
2321
2322    my @r;
2323    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2324	$s =~ m/^$rfc822_char*$/) {
2325        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2326            push(@r, $1);
2327        }
2328        return wantarray ? (scalar(@r), @r) : 1;
2329    }
2330    return wantarray ? () : 0;
2331}
2332