xref: /openbmc/linux/scripts/get_maintainer.pl (revision 9b799b78)
1#!/usr/bin/perl -w
2# (c) 2007, Joe Perches <joe@perches.com>
3#           created from checkpatch.pl
4#
5# Print selected MAINTAINERS information for
6# the files modified in a patch or for a file
7#
8# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
9#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
10#
11# Licensed under the terms of the GNU GPL License version 2
12
13use strict;
14
15my $P = $0;
16my $V = '0.26';
17
18use Getopt::Long qw(:config no_auto_abbrev);
19
20my $lk_path = "./";
21my $email = 1;
22my $email_usename = 1;
23my $email_maintainer = 1;
24my $email_reviewer = 1;
25my $email_list = 1;
26my $email_subscriber_list = 0;
27my $email_git_penguin_chiefs = 0;
28my $email_git = 0;
29my $email_git_all_signature_types = 0;
30my $email_git_blame = 0;
31my $email_git_blame_signatures = 1;
32my $email_git_fallback = 1;
33my $email_git_min_signatures = 1;
34my $email_git_max_maintainers = 5;
35my $email_git_min_percent = 5;
36my $email_git_since = "1-year-ago";
37my $email_hg_since = "-365";
38my $interactive = 0;
39my $email_remove_duplicates = 1;
40my $email_use_mailmap = 1;
41my $output_multiline = 1;
42my $output_separator = ", ";
43my $output_roles = 0;
44my $output_rolestats = 1;
45my $scm = 0;
46my $web = 0;
47my $subsystem = 0;
48my $status = 0;
49my $keywords = 1;
50my $sections = 0;
51my $file_emails = 0;
52my $from_filename = 0;
53my $pattern_depth = 0;
54my $version = 0;
55my $help = 0;
56
57my $vcs_used = 0;
58
59my $exit = 0;
60
61my %commit_author_hash;
62my %commit_signer_hash;
63
64my @penguin_chief = ();
65push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
66#Andrew wants in on most everything - 2009/01/14
67#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
68
69my @penguin_chief_names = ();
70foreach my $chief (@penguin_chief) {
71    if ($chief =~ m/^(.*):(.*)/) {
72	my $chief_name = $1;
73	my $chief_addr = $2;
74	push(@penguin_chief_names, $chief_name);
75    }
76}
77my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
78
79# Signature types of people who are either
80# 	a) responsible for the code in question, or
81# 	b) familiar enough with it to give relevant feedback
82my @signature_tags = ();
83push(@signature_tags, "Signed-off-by:");
84push(@signature_tags, "Reviewed-by:");
85push(@signature_tags, "Acked-by:");
86
87my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
88
89# rfc822 email address - preloaded methods go here.
90my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
91my $rfc822_char = '[\\000-\\377]';
92
93# VCS command support: class-like functions and strings
94
95my %VCS_cmds;
96
97my %VCS_cmds_git = (
98    "execute_cmd" => \&git_execute_cmd,
99    "available" => '(which("git") ne "") && (-e ".git")',
100    "find_signers_cmd" =>
101	"git log --no-color --follow --since=\$email_git_since " .
102	    '--numstat --no-merges ' .
103	    '--format="GitCommit: %H%n' .
104		      'GitAuthor: %an <%ae>%n' .
105		      'GitDate: %aD%n' .
106		      'GitSubject: %s%n' .
107		      '%b%n"' .
108	    " -- \$file",
109    "find_commit_signers_cmd" =>
110	"git log --no-color " .
111	    '--numstat ' .
112	    '--format="GitCommit: %H%n' .
113		      'GitAuthor: %an <%ae>%n' .
114		      'GitDate: %aD%n' .
115		      'GitSubject: %s%n' .
116		      '%b%n"' .
117	    " -1 \$commit",
118    "find_commit_author_cmd" =>
119	"git log --no-color " .
120	    '--numstat ' .
121	    '--format="GitCommit: %H%n' .
122		      'GitAuthor: %an <%ae>%n' .
123		      'GitDate: %aD%n' .
124		      'GitSubject: %s%n"' .
125	    " -1 \$commit",
126    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
127    "blame_file_cmd" => "git blame -l \$file",
128    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
129    "blame_commit_pattern" => "^([0-9a-f]+) ",
130    "author_pattern" => "^GitAuthor: (.*)",
131    "subject_pattern" => "^GitSubject: (.*)",
132    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
133);
134
135my %VCS_cmds_hg = (
136    "execute_cmd" => \&hg_execute_cmd,
137    "available" => '(which("hg") ne "") && (-d ".hg")',
138    "find_signers_cmd" =>
139	"hg log --date=\$email_hg_since " .
140	    "--template='HgCommit: {node}\\n" .
141	                "HgAuthor: {author}\\n" .
142			"HgSubject: {desc}\\n'" .
143	    " -- \$file",
144    "find_commit_signers_cmd" =>
145	"hg log " .
146	    "--template='HgSubject: {desc}\\n'" .
147	    " -r \$commit",
148    "find_commit_author_cmd" =>
149	"hg log " .
150	    "--template='HgCommit: {node}\\n" .
151		        "HgAuthor: {author}\\n" .
152			"HgSubject: {desc|firstline}\\n'" .
153	    " -r \$commit",
154    "blame_range_cmd" => "",		# not supported
155    "blame_file_cmd" => "hg blame -n \$file",
156    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
157    "blame_commit_pattern" => "^([ 0-9a-f]+):",
158    "author_pattern" => "^HgAuthor: (.*)",
159    "subject_pattern" => "^HgSubject: (.*)",
160    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
161);
162
163my $conf = which_conf(".get_maintainer.conf");
164if (-f $conf) {
165    my @conf_args;
166    open(my $conffile, '<', "$conf")
167	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
168
169    while (<$conffile>) {
170	my $line = $_;
171
172	$line =~ s/\s*\n?$//g;
173	$line =~ s/^\s*//g;
174	$line =~ s/\s+/ /g;
175
176	next if ($line =~ m/^\s*#/);
177	next if ($line =~ m/^\s*$/);
178
179	my @words = split(" ", $line);
180	foreach my $word (@words) {
181	    last if ($word =~ m/^#/);
182	    push (@conf_args, $word);
183	}
184    }
185    close($conffile);
186    unshift(@ARGV, @conf_args) if @conf_args;
187}
188
189if (!GetOptions(
190		'email!' => \$email,
191		'git!' => \$email_git,
192		'git-all-signature-types!' => \$email_git_all_signature_types,
193		'git-blame!' => \$email_git_blame,
194		'git-blame-signatures!' => \$email_git_blame_signatures,
195		'git-fallback!' => \$email_git_fallback,
196		'git-chief-penguins!' => \$email_git_penguin_chiefs,
197		'git-min-signatures=i' => \$email_git_min_signatures,
198		'git-max-maintainers=i' => \$email_git_max_maintainers,
199		'git-min-percent=i' => \$email_git_min_percent,
200		'git-since=s' => \$email_git_since,
201		'hg-since=s' => \$email_hg_since,
202		'i|interactive!' => \$interactive,
203		'remove-duplicates!' => \$email_remove_duplicates,
204		'mailmap!' => \$email_use_mailmap,
205		'm!' => \$email_maintainer,
206		'r!' => \$email_reviewer,
207		'n!' => \$email_usename,
208		'l!' => \$email_list,
209		's!' => \$email_subscriber_list,
210		'multiline!' => \$output_multiline,
211		'roles!' => \$output_roles,
212		'rolestats!' => \$output_rolestats,
213		'separator=s' => \$output_separator,
214		'subsystem!' => \$subsystem,
215		'status!' => \$status,
216		'scm!' => \$scm,
217		'web!' => \$web,
218		'pattern-depth=i' => \$pattern_depth,
219		'k|keywords!' => \$keywords,
220		'sections!' => \$sections,
221		'fe|file-emails!' => \$file_emails,
222		'f|file' => \$from_filename,
223		'v|version' => \$version,
224		'h|help|usage' => \$help,
225		)) {
226    die "$P: invalid argument - use --help if necessary\n";
227}
228
229if ($help != 0) {
230    usage();
231    exit 0;
232}
233
234if ($version != 0) {
235    print("${P} ${V}\n");
236    exit 0;
237}
238
239if (-t STDIN && !@ARGV) {
240    # We're talking to a terminal, but have no command line arguments.
241    die "$P: missing patchfile or -f file - use --help if necessary\n";
242}
243
244$output_multiline = 0 if ($output_separator ne ", ");
245$output_rolestats = 1 if ($interactive);
246$output_roles = 1 if ($output_rolestats);
247
248if ($sections) {
249    $email = 0;
250    $email_list = 0;
251    $scm = 0;
252    $status = 0;
253    $subsystem = 0;
254    $web = 0;
255    $keywords = 0;
256    $interactive = 0;
257} else {
258    my $selections = $email + $scm + $status + $subsystem + $web;
259    if ($selections == 0) {
260	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
261    }
262}
263
264if ($email &&
265    ($email_maintainer + $email_reviewer +
266     $email_list + $email_subscriber_list +
267     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
268    die "$P: Please select at least 1 email option\n";
269}
270
271if (!top_of_kernel_tree($lk_path)) {
272    die "$P: The current directory does not appear to be "
273	. "a linux kernel source tree.\n";
274}
275
276## Read MAINTAINERS for type/value pairs
277
278my @typevalue = ();
279my %keyword_hash;
280
281open (my $maint, '<', "${lk_path}MAINTAINERS")
282    or die "$P: Can't open MAINTAINERS: $!\n";
283while (<$maint>) {
284    my $line = $_;
285
286    if ($line =~ m/^(\C):\s*(.*)/) {
287	my $type = $1;
288	my $value = $2;
289
290	##Filename pattern matching
291	if ($type eq "F" || $type eq "X") {
292	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
293	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
294	    $value =~ s/\?/\./g;         ##Convert ? to .
295	    ##if pattern is a directory and it lacks a trailing slash, add one
296	    if ((-d $value)) {
297		$value =~ s@([^/])$@$1/@;
298	    }
299	} elsif ($type eq "K") {
300	    $keyword_hash{@typevalue} = $value;
301	}
302	push(@typevalue, "$type:$value");
303    } elsif (!/^(\s)*$/) {
304	$line =~ s/\n$//g;
305	push(@typevalue, $line);
306    }
307}
308close($maint);
309
310
311#
312# Read mail address map
313#
314
315my $mailmap;
316
317read_mailmap();
318
319sub read_mailmap {
320    $mailmap = {
321	names => {},
322	addresses => {}
323    };
324
325    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
326
327    open(my $mailmap_file, '<', "${lk_path}.mailmap")
328	or warn "$P: Can't open .mailmap: $!\n";
329
330    while (<$mailmap_file>) {
331	s/#.*$//; #strip comments
332	s/^\s+|\s+$//g; #trim
333
334	next if (/^\s*$/); #skip empty lines
335	#entries have one of the following formats:
336	# name1 <mail1>
337	# <mail1> <mail2>
338	# name1 <mail1> <mail2>
339	# name1 <mail1> name2 <mail2>
340	# (see man git-shortlog)
341
342	if (/^([^<]+)<([^>]+)>$/) {
343	    my $real_name = $1;
344	    my $address = $2;
345
346	    $real_name =~ s/\s+$//;
347	    ($real_name, $address) = parse_email("$real_name <$address>");
348	    $mailmap->{names}->{$address} = $real_name;
349
350	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
351	    my $real_address = $1;
352	    my $wrong_address = $2;
353
354	    $mailmap->{addresses}->{$wrong_address} = $real_address;
355
356	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
357	    my $real_name = $1;
358	    my $real_address = $2;
359	    my $wrong_address = $3;
360
361	    $real_name =~ s/\s+$//;
362	    ($real_name, $real_address) =
363		parse_email("$real_name <$real_address>");
364	    $mailmap->{names}->{$wrong_address} = $real_name;
365	    $mailmap->{addresses}->{$wrong_address} = $real_address;
366
367	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
368	    my $real_name = $1;
369	    my $real_address = $2;
370	    my $wrong_name = $3;
371	    my $wrong_address = $4;
372
373	    $real_name =~ s/\s+$//;
374	    ($real_name, $real_address) =
375		parse_email("$real_name <$real_address>");
376
377	    $wrong_name =~ s/\s+$//;
378	    ($wrong_name, $wrong_address) =
379		parse_email("$wrong_name <$wrong_address>");
380
381	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
382	    $mailmap->{names}->{$wrong_email} = $real_name;
383	    $mailmap->{addresses}->{$wrong_email} = $real_address;
384	}
385    }
386    close($mailmap_file);
387}
388
389## use the filenames on the command line or find the filenames in the patchfiles
390
391my @files = ();
392my @range = ();
393my @keyword_tvi = ();
394my @file_emails = ();
395
396if (!@ARGV) {
397    push(@ARGV, "&STDIN");
398}
399
400foreach my $file (@ARGV) {
401    if ($file ne "&STDIN") {
402	##if $file is a directory and it lacks a trailing slash, add one
403	if ((-d $file)) {
404	    $file =~ s@([^/])$@$1/@;
405	} elsif (!(-f $file)) {
406	    die "$P: file '${file}' not found\n";
407	}
408    }
409    if ($from_filename) {
410	push(@files, $file);
411	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
412	    open(my $f, '<', $file)
413		or die "$P: Can't open $file: $!\n";
414	    my $text = do { local($/) ; <$f> };
415	    close($f);
416	    if ($keywords) {
417		foreach my $line (keys %keyword_hash) {
418		    if ($text =~ m/$keyword_hash{$line}/x) {
419			push(@keyword_tvi, $line);
420		    }
421		}
422	    }
423	    if ($file_emails) {
424		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
425		push(@file_emails, clean_file_emails(@poss_addr));
426	    }
427	}
428    } else {
429	my $file_cnt = @files;
430	my $lastfile;
431
432	open(my $patch, "< $file")
433	    or die "$P: Can't open $file: $!\n";
434
435	# We can check arbitrary information before the patch
436	# like the commit message, mail headers, etc...
437	# This allows us to match arbitrary keywords against any part
438	# of a git format-patch generated file (subject tags, etc...)
439
440	my $patch_prefix = "";			#Parsing the intro
441
442	while (<$patch>) {
443	    my $patch_line = $_;
444	    if (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
445		my $filename = $1;
446		$filename =~ s@^[^/]*/@@;
447		$filename =~ s@\n@@;
448		$lastfile = $filename;
449		push(@files, $filename);
450		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
451	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
452		if ($email_git_blame) {
453		    push(@range, "$lastfile:$1:$2");
454		}
455	    } elsif ($keywords) {
456		foreach my $line (keys %keyword_hash) {
457		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
458			push(@keyword_tvi, $line);
459		    }
460		}
461	    }
462	}
463	close($patch);
464
465	if ($file_cnt == @files) {
466	    warn "$P: file '${file}' doesn't appear to be a patch.  "
467		. "Add -f to options?\n";
468	}
469	@files = sort_and_uniq(@files);
470    }
471}
472
473@file_emails = uniq(@file_emails);
474
475my %email_hash_name;
476my %email_hash_address;
477my @email_to = ();
478my %hash_list_to;
479my @list_to = ();
480my @scm = ();
481my @web = ();
482my @subsystem = ();
483my @status = ();
484my %deduplicate_name_hash = ();
485my %deduplicate_address_hash = ();
486
487my @maintainers = get_maintainers();
488
489if (@maintainers) {
490    @maintainers = merge_email(@maintainers);
491    output(@maintainers);
492}
493
494if ($scm) {
495    @scm = uniq(@scm);
496    output(@scm);
497}
498
499if ($status) {
500    @status = uniq(@status);
501    output(@status);
502}
503
504if ($subsystem) {
505    @subsystem = uniq(@subsystem);
506    output(@subsystem);
507}
508
509if ($web) {
510    @web = uniq(@web);
511    output(@web);
512}
513
514exit($exit);
515
516sub range_is_maintained {
517    my ($start, $end) = @_;
518
519    for (my $i = $start; $i < $end; $i++) {
520	my $line = $typevalue[$i];
521	if ($line =~ m/^(\C):\s*(.*)/) {
522	    my $type = $1;
523	    my $value = $2;
524	    if ($type eq 'S') {
525		if ($value =~ /(maintain|support)/i) {
526		    return 1;
527		}
528	    }
529	}
530    }
531    return 0;
532}
533
534sub range_has_maintainer {
535    my ($start, $end) = @_;
536
537    for (my $i = $start; $i < $end; $i++) {
538	my $line = $typevalue[$i];
539	if ($line =~ m/^(\C):\s*(.*)/) {
540	    my $type = $1;
541	    my $value = $2;
542	    if ($type eq 'M') {
543		return 1;
544	    }
545	}
546    }
547    return 0;
548}
549
550sub get_maintainers {
551    %email_hash_name = ();
552    %email_hash_address = ();
553    %commit_author_hash = ();
554    %commit_signer_hash = ();
555    @email_to = ();
556    %hash_list_to = ();
557    @list_to = ();
558    @scm = ();
559    @web = ();
560    @subsystem = ();
561    @status = ();
562    %deduplicate_name_hash = ();
563    %deduplicate_address_hash = ();
564    if ($email_git_all_signature_types) {
565	$signature_pattern = "(.+?)[Bb][Yy]:";
566    } else {
567	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
568    }
569
570    # Find responsible parties
571
572    my %exact_pattern_match_hash = ();
573
574    foreach my $file (@files) {
575
576	my %hash;
577	my $tvi = find_first_section();
578	while ($tvi < @typevalue) {
579	    my $start = find_starting_index($tvi);
580	    my $end = find_ending_index($tvi);
581	    my $exclude = 0;
582	    my $i;
583
584	    #Do not match excluded file patterns
585
586	    for ($i = $start; $i < $end; $i++) {
587		my $line = $typevalue[$i];
588		if ($line =~ m/^(\C):\s*(.*)/) {
589		    my $type = $1;
590		    my $value = $2;
591		    if ($type eq 'X') {
592			if (file_match_pattern($file, $value)) {
593			    $exclude = 1;
594			    last;
595			}
596		    }
597		}
598	    }
599
600	    if (!$exclude) {
601		for ($i = $start; $i < $end; $i++) {
602		    my $line = $typevalue[$i];
603		    if ($line =~ m/^(\C):\s*(.*)/) {
604			my $type = $1;
605			my $value = $2;
606			if ($type eq 'F') {
607			    if (file_match_pattern($file, $value)) {
608				my $value_pd = ($value =~ tr@/@@);
609				my $file_pd = ($file  =~ tr@/@@);
610				$value_pd++ if (substr($value,-1,1) ne "/");
611				$value_pd = -1 if ($value =~ /^\.\*/);
612				if ($value_pd >= $file_pd &&
613				    range_is_maintained($start, $end) &&
614				    range_has_maintainer($start, $end)) {
615				    $exact_pattern_match_hash{$file} = 1;
616				}
617				if ($pattern_depth == 0 ||
618				    (($file_pd - $value_pd) < $pattern_depth)) {
619				    $hash{$tvi} = $value_pd;
620				}
621			    }
622			} elsif ($type eq 'N') {
623			    if ($file =~ m/$value/x) {
624				$hash{$tvi} = 0;
625			    }
626			}
627		    }
628		}
629	    }
630	    $tvi = $end + 1;
631	}
632
633	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
634	    add_categories($line);
635	    if ($sections) {
636		my $i;
637		my $start = find_starting_index($line);
638		my $end = find_ending_index($line);
639		for ($i = $start; $i < $end; $i++) {
640		    my $line = $typevalue[$i];
641		    if ($line =~ /^[FX]:/) {		##Restore file patterns
642			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
643			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
644			$line =~ s/\\\./\./g;       	##Convert \. to .
645			$line =~ s/\.\*/\*/g;       	##Convert .* to *
646		    }
647		    $line =~ s/^([A-Z]):/$1:\t/g;
648		    print("$line\n");
649		}
650		print("\n");
651	    }
652	}
653    }
654
655    if ($keywords) {
656	@keyword_tvi = sort_and_uniq(@keyword_tvi);
657	foreach my $line (@keyword_tvi) {
658	    add_categories($line);
659	}
660    }
661
662    foreach my $email (@email_to, @list_to) {
663	$email->[0] = deduplicate_email($email->[0]);
664    }
665
666    foreach my $file (@files) {
667	if ($email &&
668	    ($email_git || ($email_git_fallback &&
669			    !$exact_pattern_match_hash{$file}))) {
670	    vcs_file_signoffs($file);
671	}
672	if ($email && $email_git_blame) {
673	    vcs_file_blame($file);
674	}
675    }
676
677    if ($email) {
678	foreach my $chief (@penguin_chief) {
679	    if ($chief =~ m/^(.*):(.*)/) {
680		my $email_address;
681
682		$email_address = format_email($1, $2, $email_usename);
683		if ($email_git_penguin_chiefs) {
684		    push(@email_to, [$email_address, 'chief penguin']);
685		} else {
686		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
687		}
688	    }
689	}
690
691	foreach my $email (@file_emails) {
692	    my ($name, $address) = parse_email($email);
693
694	    my $tmp_email = format_email($name, $address, $email_usename);
695	    push_email_address($tmp_email, '');
696	    add_role($tmp_email, 'in file');
697	}
698    }
699
700    my @to = ();
701    if ($email || $email_list) {
702	if ($email) {
703	    @to = (@to, @email_to);
704	}
705	if ($email_list) {
706	    @to = (@to, @list_to);
707	}
708    }
709
710    if ($interactive) {
711	@to = interactive_get_maintainers(\@to);
712    }
713
714    return @to;
715}
716
717sub file_match_pattern {
718    my ($file, $pattern) = @_;
719    if (substr($pattern, -1) eq "/") {
720	if ($file =~ m@^$pattern@) {
721	    return 1;
722	}
723    } else {
724	if ($file =~ m@^$pattern@) {
725	    my $s1 = ($file =~ tr@/@@);
726	    my $s2 = ($pattern =~ tr@/@@);
727	    if ($s1 == $s2) {
728		return 1;
729	    }
730	}
731    }
732    return 0;
733}
734
735sub usage {
736    print <<EOT;
737usage: $P [options] patchfile
738       $P [options] -f file|directory
739version: $V
740
741MAINTAINER field selection options:
742  --email => print email address(es) if any
743    --git => include recent git \*-by: signers
744    --git-all-signature-types => include signers regardless of signature type
745        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
746    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
747    --git-chief-penguins => include ${penguin_chiefs}
748    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
749    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
750    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
751    --git-blame => use git blame to find modified commits for patch or file
752    --git-since => git history to use (default: $email_git_since)
753    --hg-since => hg history to use (default: $email_hg_since)
754    --interactive => display a menu (mostly useful if used with the --git option)
755    --m => include maintainer(s) if any
756    --r => include reviewer(s) if any
757    --n => include name 'Full Name <addr\@domain.tld>'
758    --l => include list(s) if any
759    --s => include subscriber only list(s) if any
760    --remove-duplicates => minimize duplicate email names/addresses
761    --roles => show roles (status:subsystem, git-signer, list, etc...)
762    --rolestats => show roles and statistics (commits/total_commits, %)
763    --file-emails => add email addresses found in -f file (default: 0 (off))
764  --scm => print SCM tree(s) if any
765  --status => print status if any
766  --subsystem => print subsystem name if any
767  --web => print website(s) if any
768
769Output type options:
770  --separator [, ] => separator for multiple entries on 1 line
771    using --separator also sets --nomultiline if --separator is not [, ]
772  --multiline => print 1 entry per line
773
774Other options:
775  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
776  --keywords => scan patch for keywords (default: $keywords)
777  --sections => print all of the subsystem sections with pattern matches
778  --mailmap => use .mailmap file (default: $email_use_mailmap)
779  --version => show version
780  --help => show this help information
781
782Default options:
783  [--email --nogit --git-fallback --m --n --l --multiline -pattern-depth=0
784   --remove-duplicates --rolestats]
785
786Notes:
787  Using "-f directory" may give unexpected results:
788      Used with "--git", git signators for _all_ files in and below
789          directory are examined as git recurses directories.
790          Any specified X: (exclude) pattern matches are _not_ ignored.
791      Used with "--nogit", directory is used as a pattern match,
792          no individual file within the directory or subdirectory
793          is matched.
794      Used with "--git-blame", does not iterate all files in directory
795  Using "--git-blame" is slow and may add old committers and authors
796      that are no longer active maintainers to the output.
797  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
798      other automated tools that expect only ["name"] <email address>
799      may not work because of additional output after <email address>.
800  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
801      not the percentage of the entire file authored.  # of commits is
802      not a good measure of amount of code authored.  1 major commit may
803      contain a thousand lines, 5 trivial commits may modify a single line.
804  If git is not installed, but mercurial (hg) is installed and an .hg
805      repository exists, the following options apply to mercurial:
806          --git,
807          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
808          --git-blame
809      Use --hg-since not --git-since to control date selection
810  File ".get_maintainer.conf", if it exists in the linux kernel source root
811      directory, can change whatever get_maintainer defaults are desired.
812      Entries in this file can be any command line argument.
813      This file is prepended to any additional command line arguments.
814      Multiple lines and # comments are allowed.
815EOT
816}
817
818sub top_of_kernel_tree {
819    my ($lk_path) = @_;
820
821    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
822	$lk_path .= "/";
823    }
824    if (   (-f "${lk_path}COPYING")
825	&& (-f "${lk_path}CREDITS")
826	&& (-f "${lk_path}Kbuild")
827	&& (-f "${lk_path}MAINTAINERS")
828	&& (-f "${lk_path}Makefile")
829	&& (-f "${lk_path}README")
830	&& (-d "${lk_path}Documentation")
831	&& (-d "${lk_path}arch")
832	&& (-d "${lk_path}include")
833	&& (-d "${lk_path}drivers")
834	&& (-d "${lk_path}fs")
835	&& (-d "${lk_path}init")
836	&& (-d "${lk_path}ipc")
837	&& (-d "${lk_path}kernel")
838	&& (-d "${lk_path}lib")
839	&& (-d "${lk_path}scripts")) {
840	return 1;
841    }
842    return 0;
843}
844
845sub parse_email {
846    my ($formatted_email) = @_;
847
848    my $name = "";
849    my $address = "";
850
851    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
852	$name = $1;
853	$address = $2;
854    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
855	$address = $1;
856    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
857	$address = $1;
858    }
859
860    $name =~ s/^\s+|\s+$//g;
861    $name =~ s/^\"|\"$//g;
862    $address =~ s/^\s+|\s+$//g;
863
864    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
865	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
866	$name = "\"$name\"";
867    }
868
869    return ($name, $address);
870}
871
872sub format_email {
873    my ($name, $address, $usename) = @_;
874
875    my $formatted_email;
876
877    $name =~ s/^\s+|\s+$//g;
878    $name =~ s/^\"|\"$//g;
879    $address =~ s/^\s+|\s+$//g;
880
881    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
882	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
883	$name = "\"$name\"";
884    }
885
886    if ($usename) {
887	if ("$name" eq "") {
888	    $formatted_email = "$address";
889	} else {
890	    $formatted_email = "$name <$address>";
891	}
892    } else {
893	$formatted_email = $address;
894    }
895
896    return $formatted_email;
897}
898
899sub find_first_section {
900    my $index = 0;
901
902    while ($index < @typevalue) {
903	my $tv = $typevalue[$index];
904	if (($tv =~ m/^(\C):\s*(.*)/)) {
905	    last;
906	}
907	$index++;
908    }
909
910    return $index;
911}
912
913sub find_starting_index {
914    my ($index) = @_;
915
916    while ($index > 0) {
917	my $tv = $typevalue[$index];
918	if (!($tv =~ m/^(\C):\s*(.*)/)) {
919	    last;
920	}
921	$index--;
922    }
923
924    return $index;
925}
926
927sub find_ending_index {
928    my ($index) = @_;
929
930    while ($index < @typevalue) {
931	my $tv = $typevalue[$index];
932	if (!($tv =~ m/^(\C):\s*(.*)/)) {
933	    last;
934	}
935	$index++;
936    }
937
938    return $index;
939}
940
941sub get_maintainer_role {
942    my ($index) = @_;
943
944    my $i;
945    my $start = find_starting_index($index);
946    my $end = find_ending_index($index);
947
948    my $role = "unknown";
949    my $subsystem = $typevalue[$start];
950    if (length($subsystem) > 20) {
951	$subsystem = substr($subsystem, 0, 17);
952	$subsystem =~ s/\s*$//;
953	$subsystem = $subsystem . "...";
954    }
955
956    for ($i = $start + 1; $i < $end; $i++) {
957	my $tv = $typevalue[$i];
958	if ($tv =~ m/^(\C):\s*(.*)/) {
959	    my $ptype = $1;
960	    my $pvalue = $2;
961	    if ($ptype eq "S") {
962		$role = $pvalue;
963	    }
964	}
965    }
966
967    $role = lc($role);
968    if      ($role eq "supported") {
969	$role = "supporter";
970    } elsif ($role eq "maintained") {
971	$role = "maintainer";
972    } elsif ($role eq "odd fixes") {
973	$role = "odd fixer";
974    } elsif ($role eq "orphan") {
975	$role = "orphan minder";
976    } elsif ($role eq "obsolete") {
977	$role = "obsolete minder";
978    } elsif ($role eq "buried alive in reporters") {
979	$role = "chief penguin";
980    }
981
982    return $role . ":" . $subsystem;
983}
984
985sub get_list_role {
986    my ($index) = @_;
987
988    my $i;
989    my $start = find_starting_index($index);
990    my $end = find_ending_index($index);
991
992    my $subsystem = $typevalue[$start];
993    if (length($subsystem) > 20) {
994	$subsystem = substr($subsystem, 0, 17);
995	$subsystem =~ s/\s*$//;
996	$subsystem = $subsystem . "...";
997    }
998
999    if ($subsystem eq "THE REST") {
1000	$subsystem = "";
1001    }
1002
1003    return $subsystem;
1004}
1005
1006sub add_categories {
1007    my ($index) = @_;
1008
1009    my $i;
1010    my $start = find_starting_index($index);
1011    my $end = find_ending_index($index);
1012
1013    push(@subsystem, $typevalue[$start]);
1014
1015    for ($i = $start + 1; $i < $end; $i++) {
1016	my $tv = $typevalue[$i];
1017	if ($tv =~ m/^(\C):\s*(.*)/) {
1018	    my $ptype = $1;
1019	    my $pvalue = $2;
1020	    if ($ptype eq "L") {
1021		my $list_address = $pvalue;
1022		my $list_additional = "";
1023		my $list_role = get_list_role($i);
1024
1025		if ($list_role ne "") {
1026		    $list_role = ":" . $list_role;
1027		}
1028		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1029		    $list_address = $1;
1030		    $list_additional = $2;
1031		}
1032		if ($list_additional =~ m/subscribers-only/) {
1033		    if ($email_subscriber_list) {
1034			if (!$hash_list_to{lc($list_address)}) {
1035			    $hash_list_to{lc($list_address)} = 1;
1036			    push(@list_to, [$list_address,
1037					    "subscriber list${list_role}"]);
1038			}
1039		    }
1040		} else {
1041		    if ($email_list) {
1042			if (!$hash_list_to{lc($list_address)}) {
1043			    $hash_list_to{lc($list_address)} = 1;
1044			    if ($list_additional =~ m/moderated/) {
1045				push(@list_to, [$list_address,
1046						"moderated list${list_role}"]);
1047			    } else {
1048				push(@list_to, [$list_address,
1049						"open list${list_role}"]);
1050			    }
1051			}
1052		    }
1053		}
1054	    } elsif ($ptype eq "M") {
1055		my ($name, $address) = parse_email($pvalue);
1056		if ($name eq "") {
1057		    if ($i > 0) {
1058			my $tv = $typevalue[$i - 1];
1059			if ($tv =~ m/^(\C):\s*(.*)/) {
1060			    if ($1 eq "P") {
1061				$name = $2;
1062				$pvalue = format_email($name, $address, $email_usename);
1063			    }
1064			}
1065		    }
1066		}
1067		if ($email_maintainer) {
1068		    my $role = get_maintainer_role($i);
1069		    push_email_addresses($pvalue, $role);
1070		}
1071	    } elsif ($ptype eq "R") {
1072		my ($name, $address) = parse_email($pvalue);
1073		if ($name eq "") {
1074		    if ($i > 0) {
1075			my $tv = $typevalue[$i - 1];
1076			if ($tv =~ m/^(\C):\s*(.*)/) {
1077			    if ($1 eq "P") {
1078				$name = $2;
1079				$pvalue = format_email($name, $address, $email_usename);
1080			    }
1081			}
1082		    }
1083		}
1084		if ($email_reviewer) {
1085		    push_email_addresses($pvalue, 'reviewer');
1086		}
1087	    } elsif ($ptype eq "T") {
1088		push(@scm, $pvalue);
1089	    } elsif ($ptype eq "W") {
1090		push(@web, $pvalue);
1091	    } elsif ($ptype eq "S") {
1092		push(@status, $pvalue);
1093	    }
1094	}
1095    }
1096}
1097
1098sub email_inuse {
1099    my ($name, $address) = @_;
1100
1101    return 1 if (($name eq "") && ($address eq ""));
1102    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1103    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1104
1105    return 0;
1106}
1107
1108sub push_email_address {
1109    my ($line, $role) = @_;
1110
1111    my ($name, $address) = parse_email($line);
1112
1113    if ($address eq "") {
1114	return 0;
1115    }
1116
1117    if (!$email_remove_duplicates) {
1118	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1119    } elsif (!email_inuse($name, $address)) {
1120	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1121	$email_hash_name{lc($name)}++ if ($name ne "");
1122	$email_hash_address{lc($address)}++;
1123    }
1124
1125    return 1;
1126}
1127
1128sub push_email_addresses {
1129    my ($address, $role) = @_;
1130
1131    my @address_list = ();
1132
1133    if (rfc822_valid($address)) {
1134	push_email_address($address, $role);
1135    } elsif (@address_list = rfc822_validlist($address)) {
1136	my $array_count = shift(@address_list);
1137	while (my $entry = shift(@address_list)) {
1138	    push_email_address($entry, $role);
1139	}
1140    } else {
1141	if (!push_email_address($address, $role)) {
1142	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1143	}
1144    }
1145}
1146
1147sub add_role {
1148    my ($line, $role) = @_;
1149
1150    my ($name, $address) = parse_email($line);
1151    my $email = format_email($name, $address, $email_usename);
1152
1153    foreach my $entry (@email_to) {
1154	if ($email_remove_duplicates) {
1155	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1156	    if (($name eq $entry_name || $address eq $entry_address)
1157		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1158	    ) {
1159		if ($entry->[1] eq "") {
1160		    $entry->[1] = "$role";
1161		} else {
1162		    $entry->[1] = "$entry->[1],$role";
1163		}
1164	    }
1165	} else {
1166	    if ($email eq $entry->[0]
1167		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1168	    ) {
1169		if ($entry->[1] eq "") {
1170		    $entry->[1] = "$role";
1171		} else {
1172		    $entry->[1] = "$entry->[1],$role";
1173		}
1174	    }
1175	}
1176    }
1177}
1178
1179sub which {
1180    my ($bin) = @_;
1181
1182    foreach my $path (split(/:/, $ENV{PATH})) {
1183	if (-e "$path/$bin") {
1184	    return "$path/$bin";
1185	}
1186    }
1187
1188    return "";
1189}
1190
1191sub which_conf {
1192    my ($conf) = @_;
1193
1194    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1195	if (-e "$path/$conf") {
1196	    return "$path/$conf";
1197	}
1198    }
1199
1200    return "";
1201}
1202
1203sub mailmap_email {
1204    my ($line) = @_;
1205
1206    my ($name, $address) = parse_email($line);
1207    my $email = format_email($name, $address, 1);
1208    my $real_name = $name;
1209    my $real_address = $address;
1210
1211    if (exists $mailmap->{names}->{$email} ||
1212	exists $mailmap->{addresses}->{$email}) {
1213	if (exists $mailmap->{names}->{$email}) {
1214	    $real_name = $mailmap->{names}->{$email};
1215	}
1216	if (exists $mailmap->{addresses}->{$email}) {
1217	    $real_address = $mailmap->{addresses}->{$email};
1218	}
1219    } else {
1220	if (exists $mailmap->{names}->{$address}) {
1221	    $real_name = $mailmap->{names}->{$address};
1222	}
1223	if (exists $mailmap->{addresses}->{$address}) {
1224	    $real_address = $mailmap->{addresses}->{$address};
1225	}
1226    }
1227    return format_email($real_name, $real_address, 1);
1228}
1229
1230sub mailmap {
1231    my (@addresses) = @_;
1232
1233    my @mapped_emails = ();
1234    foreach my $line (@addresses) {
1235	push(@mapped_emails, mailmap_email($line));
1236    }
1237    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1238    return @mapped_emails;
1239}
1240
1241sub merge_by_realname {
1242    my %address_map;
1243    my (@emails) = @_;
1244
1245    foreach my $email (@emails) {
1246	my ($name, $address) = parse_email($email);
1247	if (exists $address_map{$name}) {
1248	    $address = $address_map{$name};
1249	    $email = format_email($name, $address, 1);
1250	} else {
1251	    $address_map{$name} = $address;
1252	}
1253    }
1254}
1255
1256sub git_execute_cmd {
1257    my ($cmd) = @_;
1258    my @lines = ();
1259
1260    my $output = `$cmd`;
1261    $output =~ s/^\s*//gm;
1262    @lines = split("\n", $output);
1263
1264    return @lines;
1265}
1266
1267sub hg_execute_cmd {
1268    my ($cmd) = @_;
1269    my @lines = ();
1270
1271    my $output = `$cmd`;
1272    @lines = split("\n", $output);
1273
1274    return @lines;
1275}
1276
1277sub extract_formatted_signatures {
1278    my (@signature_lines) = @_;
1279
1280    my @type = @signature_lines;
1281
1282    s/\s*(.*):.*/$1/ for (@type);
1283
1284    # cut -f2- -d":"
1285    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1286
1287## Reformat email addresses (with names) to avoid badly written signatures
1288
1289    foreach my $signer (@signature_lines) {
1290	$signer = deduplicate_email($signer);
1291    }
1292
1293    return (\@type, \@signature_lines);
1294}
1295
1296sub vcs_find_signers {
1297    my ($cmd, $file) = @_;
1298    my $commits;
1299    my @lines = ();
1300    my @signatures = ();
1301    my @authors = ();
1302    my @stats = ();
1303
1304    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1305
1306    my $pattern = $VCS_cmds{"commit_pattern"};
1307    my $author_pattern = $VCS_cmds{"author_pattern"};
1308    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1309
1310    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1311
1312    $commits = grep(/$pattern/, @lines);	# of commits
1313
1314    @authors = grep(/$author_pattern/, @lines);
1315    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1316    @stats = grep(/$stat_pattern/, @lines);
1317
1318#    print("stats: <@stats>\n");
1319
1320    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1321
1322    save_commits_by_author(@lines) if ($interactive);
1323    save_commits_by_signer(@lines) if ($interactive);
1324
1325    if (!$email_git_penguin_chiefs) {
1326	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1327    }
1328
1329    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1330    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1331
1332    return ($commits, $signers_ref, $authors_ref, \@stats);
1333}
1334
1335sub vcs_find_author {
1336    my ($cmd) = @_;
1337    my @lines = ();
1338
1339    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1340
1341    if (!$email_git_penguin_chiefs) {
1342	@lines = grep(!/${penguin_chiefs}/i, @lines);
1343    }
1344
1345    return @lines if !@lines;
1346
1347    my @authors = ();
1348    foreach my $line (@lines) {
1349	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1350	    my $author = $1;
1351	    my ($name, $address) = parse_email($author);
1352	    $author = format_email($name, $address, 1);
1353	    push(@authors, $author);
1354	}
1355    }
1356
1357    save_commits_by_author(@lines) if ($interactive);
1358    save_commits_by_signer(@lines) if ($interactive);
1359
1360    return @authors;
1361}
1362
1363sub vcs_save_commits {
1364    my ($cmd) = @_;
1365    my @lines = ();
1366    my @commits = ();
1367
1368    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1369
1370    foreach my $line (@lines) {
1371	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1372	    push(@commits, $1);
1373	}
1374    }
1375
1376    return @commits;
1377}
1378
1379sub vcs_blame {
1380    my ($file) = @_;
1381    my $cmd;
1382    my @commits = ();
1383
1384    return @commits if (!(-f $file));
1385
1386    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1387	my @all_commits = ();
1388
1389	$cmd = $VCS_cmds{"blame_file_cmd"};
1390	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1391	@all_commits = vcs_save_commits($cmd);
1392
1393	foreach my $file_range_diff (@range) {
1394	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1395	    my $diff_file = $1;
1396	    my $diff_start = $2;
1397	    my $diff_length = $3;
1398	    next if ("$file" ne "$diff_file");
1399	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1400		push(@commits, $all_commits[$i]);
1401	    }
1402	}
1403    } elsif (@range) {
1404	foreach my $file_range_diff (@range) {
1405	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1406	    my $diff_file = $1;
1407	    my $diff_start = $2;
1408	    my $diff_length = $3;
1409	    next if ("$file" ne "$diff_file");
1410	    $cmd = $VCS_cmds{"blame_range_cmd"};
1411	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1412	    push(@commits, vcs_save_commits($cmd));
1413	}
1414    } else {
1415	$cmd = $VCS_cmds{"blame_file_cmd"};
1416	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1417	@commits = vcs_save_commits($cmd);
1418    }
1419
1420    foreach my $commit (@commits) {
1421	$commit =~ s/^\^//g;
1422    }
1423
1424    return @commits;
1425}
1426
1427my $printed_novcs = 0;
1428sub vcs_exists {
1429    %VCS_cmds = %VCS_cmds_git;
1430    return 1 if eval $VCS_cmds{"available"};
1431    %VCS_cmds = %VCS_cmds_hg;
1432    return 2 if eval $VCS_cmds{"available"};
1433    %VCS_cmds = ();
1434    if (!$printed_novcs) {
1435	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1436	warn("Using a git repository produces better results.\n");
1437	warn("Try Linus Torvalds' latest git repository using:\n");
1438	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1439	$printed_novcs = 1;
1440    }
1441    return 0;
1442}
1443
1444sub vcs_is_git {
1445    vcs_exists();
1446    return $vcs_used == 1;
1447}
1448
1449sub vcs_is_hg {
1450    return $vcs_used == 2;
1451}
1452
1453sub interactive_get_maintainers {
1454    my ($list_ref) = @_;
1455    my @list = @$list_ref;
1456
1457    vcs_exists();
1458
1459    my %selected;
1460    my %authored;
1461    my %signed;
1462    my $count = 0;
1463    my $maintained = 0;
1464    foreach my $entry (@list) {
1465	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1466	$selected{$count} = 1;
1467	$authored{$count} = 0;
1468	$signed{$count} = 0;
1469	$count++;
1470    }
1471
1472    #menu loop
1473    my $done = 0;
1474    my $print_options = 0;
1475    my $redraw = 1;
1476    while (!$done) {
1477	$count = 0;
1478	if ($redraw) {
1479	    printf STDERR "\n%1s %2s %-65s",
1480			  "*", "#", "email/list and role:stats";
1481	    if ($email_git ||
1482		($email_git_fallback && !$maintained) ||
1483		$email_git_blame) {
1484		print STDERR "auth sign";
1485	    }
1486	    print STDERR "\n";
1487	    foreach my $entry (@list) {
1488		my $email = $entry->[0];
1489		my $role = $entry->[1];
1490		my $sel = "";
1491		$sel = "*" if ($selected{$count});
1492		my $commit_author = $commit_author_hash{$email};
1493		my $commit_signer = $commit_signer_hash{$email};
1494		my $authored = 0;
1495		my $signed = 0;
1496		$authored++ for (@{$commit_author});
1497		$signed++ for (@{$commit_signer});
1498		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1499		printf STDERR "%4d %4d", $authored, $signed
1500		    if ($authored > 0 || $signed > 0);
1501		printf STDERR "\n     %s\n", $role;
1502		if ($authored{$count}) {
1503		    my $commit_author = $commit_author_hash{$email};
1504		    foreach my $ref (@{$commit_author}) {
1505			print STDERR "     Author: @{$ref}[1]\n";
1506		    }
1507		}
1508		if ($signed{$count}) {
1509		    my $commit_signer = $commit_signer_hash{$email};
1510		    foreach my $ref (@{$commit_signer}) {
1511			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1512		    }
1513		}
1514
1515		$count++;
1516	    }
1517	}
1518	my $date_ref = \$email_git_since;
1519	$date_ref = \$email_hg_since if (vcs_is_hg());
1520	if ($print_options) {
1521	    $print_options = 0;
1522	    if (vcs_exists()) {
1523		print STDERR <<EOT
1524
1525Version Control options:
1526g  use git history      [$email_git]
1527gf use git-fallback     [$email_git_fallback]
1528b  use git blame        [$email_git_blame]
1529bs use blame signatures [$email_git_blame_signatures]
1530c# minimum commits      [$email_git_min_signatures]
1531%# min percent          [$email_git_min_percent]
1532d# history to use       [$$date_ref]
1533x# max maintainers      [$email_git_max_maintainers]
1534t  all signature types  [$email_git_all_signature_types]
1535m  use .mailmap         [$email_use_mailmap]
1536EOT
1537	    }
1538	    print STDERR <<EOT
1539
1540Additional options:
15410  toggle all
1542tm toggle maintainers
1543tg toggle git entries
1544tl toggle open list entries
1545ts toggle subscriber list entries
1546f  emails in file       [$file_emails]
1547k  keywords in file     [$keywords]
1548r  remove duplicates    [$email_remove_duplicates]
1549p# pattern match depth  [$pattern_depth]
1550EOT
1551	}
1552	print STDERR
1553"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1554
1555	my $input = <STDIN>;
1556	chomp($input);
1557
1558	$redraw = 1;
1559	my $rerun = 0;
1560	my @wish = split(/[, ]+/, $input);
1561	foreach my $nr (@wish) {
1562	    $nr = lc($nr);
1563	    my $sel = substr($nr, 0, 1);
1564	    my $str = substr($nr, 1);
1565	    my $val = 0;
1566	    $val = $1 if $str =~ /^(\d+)$/;
1567
1568	    if ($sel eq "y") {
1569		$interactive = 0;
1570		$done = 1;
1571		$output_rolestats = 0;
1572		$output_roles = 0;
1573		last;
1574	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1575		$selected{$nr - 1} = !$selected{$nr - 1};
1576	    } elsif ($sel eq "*" || $sel eq '^') {
1577		my $toggle = 0;
1578		$toggle = 1 if ($sel eq '*');
1579		for (my $i = 0; $i < $count; $i++) {
1580		    $selected{$i} = $toggle;
1581		}
1582	    } elsif ($sel eq "0") {
1583		for (my $i = 0; $i < $count; $i++) {
1584		    $selected{$i} = !$selected{$i};
1585		}
1586	    } elsif ($sel eq "t") {
1587		if (lc($str) eq "m") {
1588		    for (my $i = 0; $i < $count; $i++) {
1589			$selected{$i} = !$selected{$i}
1590			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1591		    }
1592		} elsif (lc($str) eq "g") {
1593		    for (my $i = 0; $i < $count; $i++) {
1594			$selected{$i} = !$selected{$i}
1595			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1596		    }
1597		} elsif (lc($str) eq "l") {
1598		    for (my $i = 0; $i < $count; $i++) {
1599			$selected{$i} = !$selected{$i}
1600			    if ($list[$i]->[1] =~ /^(open list)/i);
1601		    }
1602		} elsif (lc($str) eq "s") {
1603		    for (my $i = 0; $i < $count; $i++) {
1604			$selected{$i} = !$selected{$i}
1605			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1606		    }
1607		}
1608	    } elsif ($sel eq "a") {
1609		if ($val > 0 && $val <= $count) {
1610		    $authored{$val - 1} = !$authored{$val - 1};
1611		} elsif ($str eq '*' || $str eq '^') {
1612		    my $toggle = 0;
1613		    $toggle = 1 if ($str eq '*');
1614		    for (my $i = 0; $i < $count; $i++) {
1615			$authored{$i} = $toggle;
1616		    }
1617		}
1618	    } elsif ($sel eq "s") {
1619		if ($val > 0 && $val <= $count) {
1620		    $signed{$val - 1} = !$signed{$val - 1};
1621		} elsif ($str eq '*' || $str eq '^') {
1622		    my $toggle = 0;
1623		    $toggle = 1 if ($str eq '*');
1624		    for (my $i = 0; $i < $count; $i++) {
1625			$signed{$i} = $toggle;
1626		    }
1627		}
1628	    } elsif ($sel eq "o") {
1629		$print_options = 1;
1630		$redraw = 1;
1631	    } elsif ($sel eq "g") {
1632		if ($str eq "f") {
1633		    bool_invert(\$email_git_fallback);
1634		} else {
1635		    bool_invert(\$email_git);
1636		}
1637		$rerun = 1;
1638	    } elsif ($sel eq "b") {
1639		if ($str eq "s") {
1640		    bool_invert(\$email_git_blame_signatures);
1641		} else {
1642		    bool_invert(\$email_git_blame);
1643		}
1644		$rerun = 1;
1645	    } elsif ($sel eq "c") {
1646		if ($val > 0) {
1647		    $email_git_min_signatures = $val;
1648		    $rerun = 1;
1649		}
1650	    } elsif ($sel eq "x") {
1651		if ($val > 0) {
1652		    $email_git_max_maintainers = $val;
1653		    $rerun = 1;
1654		}
1655	    } elsif ($sel eq "%") {
1656		if ($str ne "" && $val >= 0) {
1657		    $email_git_min_percent = $val;
1658		    $rerun = 1;
1659		}
1660	    } elsif ($sel eq "d") {
1661		if (vcs_is_git()) {
1662		    $email_git_since = $str;
1663		} elsif (vcs_is_hg()) {
1664		    $email_hg_since = $str;
1665		}
1666		$rerun = 1;
1667	    } elsif ($sel eq "t") {
1668		bool_invert(\$email_git_all_signature_types);
1669		$rerun = 1;
1670	    } elsif ($sel eq "f") {
1671		bool_invert(\$file_emails);
1672		$rerun = 1;
1673	    } elsif ($sel eq "r") {
1674		bool_invert(\$email_remove_duplicates);
1675		$rerun = 1;
1676	    } elsif ($sel eq "m") {
1677		bool_invert(\$email_use_mailmap);
1678		read_mailmap();
1679		$rerun = 1;
1680	    } elsif ($sel eq "k") {
1681		bool_invert(\$keywords);
1682		$rerun = 1;
1683	    } elsif ($sel eq "p") {
1684		if ($str ne "" && $val >= 0) {
1685		    $pattern_depth = $val;
1686		    $rerun = 1;
1687		}
1688	    } elsif ($sel eq "h" || $sel eq "?") {
1689		print STDERR <<EOT
1690
1691Interactive mode allows you to select the various maintainers, submitters,
1692commit signers and mailing lists that could be CC'd on a patch.
1693
1694Any *'d entry is selected.
1695
1696If you have git or hg installed, you can choose to summarize the commit
1697history of files in the patch.  Also, each line of the current file can
1698be matched to its commit author and that commits signers with blame.
1699
1700Various knobs exist to control the length of time for active commit
1701tracking, the maximum number of commit authors and signers to add,
1702and such.
1703
1704Enter selections at the prompt until you are satisfied that the selected
1705maintainers are appropriate.  You may enter multiple selections separated
1706by either commas or spaces.
1707
1708EOT
1709	    } else {
1710		print STDERR "invalid option: '$nr'\n";
1711		$redraw = 0;
1712	    }
1713	}
1714	if ($rerun) {
1715	    print STDERR "git-blame can be very slow, please have patience..."
1716		if ($email_git_blame);
1717	    goto &get_maintainers;
1718	}
1719    }
1720
1721    #drop not selected entries
1722    $count = 0;
1723    my @new_emailto = ();
1724    foreach my $entry (@list) {
1725	if ($selected{$count}) {
1726	    push(@new_emailto, $list[$count]);
1727	}
1728	$count++;
1729    }
1730    return @new_emailto;
1731}
1732
1733sub bool_invert {
1734    my ($bool_ref) = @_;
1735
1736    if ($$bool_ref) {
1737	$$bool_ref = 0;
1738    } else {
1739	$$bool_ref = 1;
1740    }
1741}
1742
1743sub deduplicate_email {
1744    my ($email) = @_;
1745
1746    my $matched = 0;
1747    my ($name, $address) = parse_email($email);
1748    $email = format_email($name, $address, 1);
1749    $email = mailmap_email($email);
1750
1751    return $email if (!$email_remove_duplicates);
1752
1753    ($name, $address) = parse_email($email);
1754
1755    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
1756	$name = $deduplicate_name_hash{lc($name)}->[0];
1757	$address = $deduplicate_name_hash{lc($name)}->[1];
1758	$matched = 1;
1759    } elsif ($deduplicate_address_hash{lc($address)}) {
1760	$name = $deduplicate_address_hash{lc($address)}->[0];
1761	$address = $deduplicate_address_hash{lc($address)}->[1];
1762	$matched = 1;
1763    }
1764    if (!$matched) {
1765	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
1766	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
1767    }
1768    $email = format_email($name, $address, 1);
1769    $email = mailmap_email($email);
1770    return $email;
1771}
1772
1773sub save_commits_by_author {
1774    my (@lines) = @_;
1775
1776    my @authors = ();
1777    my @commits = ();
1778    my @subjects = ();
1779
1780    foreach my $line (@lines) {
1781	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1782	    my $author = $1;
1783	    $author = deduplicate_email($author);
1784	    push(@authors, $author);
1785	}
1786	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1787	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1788    }
1789
1790    for (my $i = 0; $i < @authors; $i++) {
1791	my $exists = 0;
1792	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
1793	    if (@{$ref}[0] eq $commits[$i] &&
1794		@{$ref}[1] eq $subjects[$i]) {
1795		$exists = 1;
1796		last;
1797	    }
1798	}
1799	if (!$exists) {
1800	    push(@{$commit_author_hash{$authors[$i]}},
1801		 [ ($commits[$i], $subjects[$i]) ]);
1802	}
1803    }
1804}
1805
1806sub save_commits_by_signer {
1807    my (@lines) = @_;
1808
1809    my $commit = "";
1810    my $subject = "";
1811
1812    foreach my $line (@lines) {
1813	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1814	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1815	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
1816	    my @signatures = ($line);
1817	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1818	    my @types = @$types_ref;
1819	    my @signers = @$signers_ref;
1820
1821	    my $type = $types[0];
1822	    my $signer = $signers[0];
1823
1824	    $signer = deduplicate_email($signer);
1825
1826	    my $exists = 0;
1827	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
1828		if (@{$ref}[0] eq $commit &&
1829		    @{$ref}[1] eq $subject &&
1830		    @{$ref}[2] eq $type) {
1831		    $exists = 1;
1832		    last;
1833		}
1834	    }
1835	    if (!$exists) {
1836		push(@{$commit_signer_hash{$signer}},
1837		     [ ($commit, $subject, $type) ]);
1838	    }
1839	}
1840    }
1841}
1842
1843sub vcs_assign {
1844    my ($role, $divisor, @lines) = @_;
1845
1846    my %hash;
1847    my $count = 0;
1848
1849    return if (@lines <= 0);
1850
1851    if ($divisor <= 0) {
1852	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
1853	$divisor = 1;
1854    }
1855
1856    @lines = mailmap(@lines);
1857
1858    return if (@lines <= 0);
1859
1860    @lines = sort(@lines);
1861
1862    # uniq -c
1863    $hash{$_}++ for @lines;
1864
1865    # sort -rn
1866    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
1867	my $sign_offs = $hash{$line};
1868	my $percent = $sign_offs * 100 / $divisor;
1869
1870	$percent = 100 if ($percent > 100);
1871	$count++;
1872	last if ($sign_offs < $email_git_min_signatures ||
1873		 $count > $email_git_max_maintainers ||
1874		 $percent < $email_git_min_percent);
1875	push_email_address($line, '');
1876	if ($output_rolestats) {
1877	    my $fmt_percent = sprintf("%.0f", $percent);
1878	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
1879	} else {
1880	    add_role($line, $role);
1881	}
1882    }
1883}
1884
1885sub vcs_file_signoffs {
1886    my ($file) = @_;
1887
1888    my $authors_ref;
1889    my $signers_ref;
1890    my $stats_ref;
1891    my @authors = ();
1892    my @signers = ();
1893    my @stats = ();
1894    my $commits;
1895
1896    $vcs_used = vcs_exists();
1897    return if (!$vcs_used);
1898
1899    my $cmd = $VCS_cmds{"find_signers_cmd"};
1900    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
1901
1902    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
1903
1904    @signers = @{$signers_ref} if defined $signers_ref;
1905    @authors = @{$authors_ref} if defined $authors_ref;
1906    @stats = @{$stats_ref} if defined $stats_ref;
1907
1908#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
1909
1910    foreach my $signer (@signers) {
1911	$signer = deduplicate_email($signer);
1912    }
1913
1914    vcs_assign("commit_signer", $commits, @signers);
1915    vcs_assign("authored", $commits, @authors);
1916    if ($#authors == $#stats) {
1917	my $stat_pattern = $VCS_cmds{"stat_pattern"};
1918	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
1919
1920	my $added = 0;
1921	my $deleted = 0;
1922	for (my $i = 0; $i <= $#stats; $i++) {
1923	    if ($stats[$i] =~ /$stat_pattern/) {
1924		$added += $1;
1925		$deleted += $2;
1926	    }
1927	}
1928	my @tmp_authors = uniq(@authors);
1929	foreach my $author (@tmp_authors) {
1930	    $author = deduplicate_email($author);
1931	}
1932	@tmp_authors = uniq(@tmp_authors);
1933	my @list_added = ();
1934	my @list_deleted = ();
1935	foreach my $author (@tmp_authors) {
1936	    my $auth_added = 0;
1937	    my $auth_deleted = 0;
1938	    for (my $i = 0; $i <= $#stats; $i++) {
1939		if ($author eq deduplicate_email($authors[$i]) &&
1940		    $stats[$i] =~ /$stat_pattern/) {
1941		    $auth_added += $1;
1942		    $auth_deleted += $2;
1943		}
1944	    }
1945	    for (my $i = 0; $i < $auth_added; $i++) {
1946		push(@list_added, $author);
1947	    }
1948	    for (my $i = 0; $i < $auth_deleted; $i++) {
1949		push(@list_deleted, $author);
1950	    }
1951	}
1952	vcs_assign("added_lines", $added, @list_added);
1953	vcs_assign("removed_lines", $deleted, @list_deleted);
1954    }
1955}
1956
1957sub vcs_file_blame {
1958    my ($file) = @_;
1959
1960    my @signers = ();
1961    my @all_commits = ();
1962    my @commits = ();
1963    my $total_commits;
1964    my $total_lines;
1965
1966    $vcs_used = vcs_exists();
1967    return if (!$vcs_used);
1968
1969    @all_commits = vcs_blame($file);
1970    @commits = uniq(@all_commits);
1971    $total_commits = @commits;
1972    $total_lines = @all_commits;
1973
1974    if ($email_git_blame_signatures) {
1975	if (vcs_is_hg()) {
1976	    my $commit_count;
1977	    my $commit_authors_ref;
1978	    my $commit_signers_ref;
1979	    my $stats_ref;
1980	    my @commit_authors = ();
1981	    my @commit_signers = ();
1982	    my $commit = join(" -r ", @commits);
1983	    my $cmd;
1984
1985	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1986	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1987
1988	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
1989	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
1990	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
1991
1992	    push(@signers, @commit_signers);
1993	} else {
1994	    foreach my $commit (@commits) {
1995		my $commit_count;
1996		my $commit_authors_ref;
1997		my $commit_signers_ref;
1998		my $stats_ref;
1999		my @commit_authors = ();
2000		my @commit_signers = ();
2001		my $cmd;
2002
2003		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
2004		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2005
2006		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
2007		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
2008		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
2009
2010		push(@signers, @commit_signers);
2011	    }
2012	}
2013    }
2014
2015    if ($from_filename) {
2016	if ($output_rolestats) {
2017	    my @blame_signers;
2018	    if (vcs_is_hg()) {{		# Double brace for last exit
2019		my $commit_count;
2020		my @commit_signers = ();
2021		@commits = uniq(@commits);
2022		@commits = sort(@commits);
2023		my $commit = join(" -r ", @commits);
2024		my $cmd;
2025
2026		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2027		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2028
2029		my @lines = ();
2030
2031		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2032
2033		if (!$email_git_penguin_chiefs) {
2034		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2035		}
2036
2037		last if !@lines;
2038
2039		my @authors = ();
2040		foreach my $line (@lines) {
2041		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2042			my $author = $1;
2043			$author = deduplicate_email($author);
2044			push(@authors, $author);
2045		    }
2046		}
2047
2048		save_commits_by_author(@lines) if ($interactive);
2049		save_commits_by_signer(@lines) if ($interactive);
2050
2051		push(@signers, @authors);
2052	    }}
2053	    else {
2054		foreach my $commit (@commits) {
2055		    my $i;
2056		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2057		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2058		    my @author = vcs_find_author($cmd);
2059		    next if !@author;
2060
2061		    my $formatted_author = deduplicate_email($author[0]);
2062
2063		    my $count = grep(/$commit/, @all_commits);
2064		    for ($i = 0; $i < $count ; $i++) {
2065			push(@blame_signers, $formatted_author);
2066		    }
2067		}
2068	    }
2069	    if (@blame_signers) {
2070		vcs_assign("authored lines", $total_lines, @blame_signers);
2071	    }
2072	}
2073	foreach my $signer (@signers) {
2074	    $signer = deduplicate_email($signer);
2075	}
2076	vcs_assign("commits", $total_commits, @signers);
2077    } else {
2078	foreach my $signer (@signers) {
2079	    $signer = deduplicate_email($signer);
2080	}
2081	vcs_assign("modified commits", $total_commits, @signers);
2082    }
2083}
2084
2085sub uniq {
2086    my (@parms) = @_;
2087
2088    my %saw;
2089    @parms = grep(!$saw{$_}++, @parms);
2090    return @parms;
2091}
2092
2093sub sort_and_uniq {
2094    my (@parms) = @_;
2095
2096    my %saw;
2097    @parms = sort @parms;
2098    @parms = grep(!$saw{$_}++, @parms);
2099    return @parms;
2100}
2101
2102sub clean_file_emails {
2103    my (@file_emails) = @_;
2104    my @fmt_emails = ();
2105
2106    foreach my $email (@file_emails) {
2107	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2108	my ($name, $address) = parse_email($email);
2109	if ($name eq '"[,\.]"') {
2110	    $name = "";
2111	}
2112
2113	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2114	if (@nw > 2) {
2115	    my $first = $nw[@nw - 3];
2116	    my $middle = $nw[@nw - 2];
2117	    my $last = $nw[@nw - 1];
2118
2119	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2120		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2121		(length($middle) == 1 ||
2122		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2123		$name = "$first $middle $last";
2124	    } else {
2125		$name = "$middle $last";
2126	    }
2127	}
2128
2129	if (substr($name, -1) =~ /[,\.]/) {
2130	    $name = substr($name, 0, length($name) - 1);
2131	} elsif (substr($name, -2) =~ /[,\.]"/) {
2132	    $name = substr($name, 0, length($name) - 2) . '"';
2133	}
2134
2135	if (substr($name, 0, 1) =~ /[,\.]/) {
2136	    $name = substr($name, 1, length($name) - 1);
2137	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2138	    $name = '"' . substr($name, 2, length($name) - 2);
2139	}
2140
2141	my $fmt_email = format_email($name, $address, $email_usename);
2142	push(@fmt_emails, $fmt_email);
2143    }
2144    return @fmt_emails;
2145}
2146
2147sub merge_email {
2148    my @lines;
2149    my %saw;
2150
2151    for (@_) {
2152	my ($address, $role) = @$_;
2153	if (!$saw{$address}) {
2154	    if ($output_roles) {
2155		push(@lines, "$address ($role)");
2156	    } else {
2157		push(@lines, $address);
2158	    }
2159	    $saw{$address} = 1;
2160	}
2161    }
2162
2163    return @lines;
2164}
2165
2166sub output {
2167    my (@parms) = @_;
2168
2169    if ($output_multiline) {
2170	foreach my $line (@parms) {
2171	    print("${line}\n");
2172	}
2173    } else {
2174	print(join($output_separator, @parms));
2175	print("\n");
2176    }
2177}
2178
2179my $rfc822re;
2180
2181sub make_rfc822re {
2182#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2183#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2184#   This regexp will only work on addresses which have had comments stripped
2185#   and replaced with rfc822_lwsp.
2186
2187    my $specials = '()<>@,;:\\\\".\\[\\]';
2188    my $controls = '\\000-\\037\\177';
2189
2190    my $dtext = "[^\\[\\]\\r\\\\]";
2191    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2192
2193    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2194
2195#   Use zero-width assertion to spot the limit of an atom.  A simple
2196#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2197    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2198    my $word = "(?:$atom|$quoted_string)";
2199    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2200
2201    my $sub_domain = "(?:$atom|$domain_literal)";
2202    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2203
2204    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2205
2206    my $phrase = "$word*";
2207    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2208    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2209    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2210
2211    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2212    my $address = "(?:$mailbox|$group)";
2213
2214    return "$rfc822_lwsp*$address";
2215}
2216
2217sub rfc822_strip_comments {
2218    my $s = shift;
2219#   Recursively remove comments, and replace with a single space.  The simpler
2220#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2221#   chars in atoms, for example.
2222
2223    while ($s =~ s/^((?:[^"\\]|\\.)*
2224                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2225                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2226    return $s;
2227}
2228
2229#   valid: returns true if the parameter is an RFC822 valid address
2230#
2231sub rfc822_valid {
2232    my $s = rfc822_strip_comments(shift);
2233
2234    if (!$rfc822re) {
2235        $rfc822re = make_rfc822re();
2236    }
2237
2238    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2239}
2240
2241#   validlist: In scalar context, returns true if the parameter is an RFC822
2242#              valid list of addresses.
2243#
2244#              In list context, returns an empty list on failure (an invalid
2245#              address was found); otherwise a list whose first element is the
2246#              number of addresses found and whose remaining elements are the
2247#              addresses.  This is needed to disambiguate failure (invalid)
2248#              from success with no addresses found, because an empty string is
2249#              a valid list.
2250
2251sub rfc822_validlist {
2252    my $s = rfc822_strip_comments(shift);
2253
2254    if (!$rfc822re) {
2255        $rfc822re = make_rfc822re();
2256    }
2257    # * null list items are valid according to the RFC
2258    # * the '1' business is to aid in distinguishing failure from no results
2259
2260    my @r;
2261    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2262	$s =~ m/^$rfc822_char*$/) {
2263        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2264            push(@r, $1);
2265        }
2266        return wantarray ? (scalar(@r), @r) : 1;
2267    }
2268    return wantarray ? () : 0;
2269}
2270