xref: /openbmc/u-boot/scripts/get_maintainer.pl (revision ee360cd2)
1#!/usr/bin/perl -w
2# (c) 2007, Joe Perches <joe@perches.com>
3#           created from checkpatch.pl
4#
5# Print selected MAINTAINERS information for
6# the files modified in a patch or for a file
7#
8# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
9#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
10#
11# Licensed under the terms of the GNU GPL License version 2
12
13use strict;
14
15my $P = $0;
16my $V = '0.26';
17
18use Getopt::Long qw(:config no_auto_abbrev);
19
20my $lk_path = "./";
21my $email = 1;
22my $email_usename = 1;
23my $email_maintainer = 1;
24my $email_list = 1;
25my $email_subscriber_list = 0;
26my $email_git_penguin_chiefs = 0;
27my $email_git = 0;
28my $email_git_all_signature_types = 0;
29my $email_git_blame = 0;
30my $email_git_blame_signatures = 1;
31my $email_git_fallback = 1;
32my $email_git_min_signatures = 1;
33my $email_git_max_maintainers = 5;
34my $email_git_min_percent = 5;
35my $email_git_since = "1-year-ago";
36my $email_hg_since = "-365";
37my $interactive = 0;
38my $email_remove_duplicates = 1;
39my $email_use_mailmap = 1;
40my $output_multiline = 1;
41my $output_separator = ", ";
42my $output_roles = 0;
43my $output_rolestats = 1;
44my $scm = 0;
45my $web = 0;
46my $subsystem = 0;
47my $status = 0;
48my $keywords = 1;
49my $sections = 0;
50my $file_emails = 0;
51my $from_filename = 0;
52my $pattern_depth = 0;
53my $version = 0;
54my $help = 0;
55
56my $vcs_used = 0;
57
58my $exit = 0;
59
60my %commit_author_hash;
61my %commit_signer_hash;
62
63my @penguin_chief = ();
64push(@penguin_chief, "Tom Rini:trini\@ti.com");
65
66my @penguin_chief_names = ();
67foreach my $chief (@penguin_chief) {
68    if ($chief =~ m/^(.*):(.*)/) {
69	my $chief_name = $1;
70	my $chief_addr = $2;
71	push(@penguin_chief_names, $chief_name);
72    }
73}
74my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
75
76# Signature types of people who are either
77# 	a) responsible for the code in question, or
78# 	b) familiar enough with it to give relevant feedback
79my @signature_tags = ();
80push(@signature_tags, "Signed-off-by:");
81push(@signature_tags, "Reviewed-by:");
82push(@signature_tags, "Acked-by:");
83
84my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
85
86# rfc822 email address - preloaded methods go here.
87my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
88my $rfc822_char = '[\\000-\\377]';
89
90# VCS command support: class-like functions and strings
91
92my %VCS_cmds;
93
94my %VCS_cmds_git = (
95    "execute_cmd" => \&git_execute_cmd,
96    "available" => '(which("git") ne "") && (-e ".git")',
97    "find_signers_cmd" =>
98	"git log --no-color --follow --since=\$email_git_since " .
99	    '--numstat --no-merges ' .
100	    '--format="GitCommit: %H%n' .
101		      'GitAuthor: %an <%ae>%n' .
102		      'GitDate: %aD%n' .
103		      'GitSubject: %s%n' .
104		      '%b%n"' .
105	    " -- \$file",
106    "find_commit_signers_cmd" =>
107	"git log --no-color " .
108	    '--numstat ' .
109	    '--format="GitCommit: %H%n' .
110		      'GitAuthor: %an <%ae>%n' .
111		      'GitDate: %aD%n' .
112		      'GitSubject: %s%n' .
113		      '%b%n"' .
114	    " -1 \$commit",
115    "find_commit_author_cmd" =>
116	"git log --no-color " .
117	    '--numstat ' .
118	    '--format="GitCommit: %H%n' .
119		      'GitAuthor: %an <%ae>%n' .
120		      'GitDate: %aD%n' .
121		      'GitSubject: %s%n"' .
122	    " -1 \$commit",
123    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
124    "blame_file_cmd" => "git blame -l \$file",
125    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
126    "blame_commit_pattern" => "^([0-9a-f]+) ",
127    "author_pattern" => "^GitAuthor: (.*)",
128    "subject_pattern" => "^GitSubject: (.*)",
129    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
130);
131
132my %VCS_cmds_hg = (
133    "execute_cmd" => \&hg_execute_cmd,
134    "available" => '(which("hg") ne "") && (-d ".hg")',
135    "find_signers_cmd" =>
136	"hg log --date=\$email_hg_since " .
137	    "--template='HgCommit: {node}\\n" .
138	                "HgAuthor: {author}\\n" .
139			"HgSubject: {desc}\\n'" .
140	    " -- \$file",
141    "find_commit_signers_cmd" =>
142	"hg log " .
143	    "--template='HgSubject: {desc}\\n'" .
144	    " -r \$commit",
145    "find_commit_author_cmd" =>
146	"hg log " .
147	    "--template='HgCommit: {node}\\n" .
148		        "HgAuthor: {author}\\n" .
149			"HgSubject: {desc|firstline}\\n'" .
150	    " -r \$commit",
151    "blame_range_cmd" => "",		# not supported
152    "blame_file_cmd" => "hg blame -n \$file",
153    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
154    "blame_commit_pattern" => "^([ 0-9a-f]+):",
155    "author_pattern" => "^HgAuthor: (.*)",
156    "subject_pattern" => "^HgSubject: (.*)",
157    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
158);
159
160my $conf = which_conf(".get_maintainer.conf");
161if (-f $conf) {
162    my @conf_args;
163    open(my $conffile, '<', "$conf")
164	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
165
166    while (<$conffile>) {
167	my $line = $_;
168
169	$line =~ s/\s*\n?$//g;
170	$line =~ s/^\s*//g;
171	$line =~ s/\s+/ /g;
172
173	next if ($line =~ m/^\s*#/);
174	next if ($line =~ m/^\s*$/);
175
176	my @words = split(" ", $line);
177	foreach my $word (@words) {
178	    last if ($word =~ m/^#/);
179	    push (@conf_args, $word);
180	}
181    }
182    close($conffile);
183    unshift(@ARGV, @conf_args) if @conf_args;
184}
185
186if (!GetOptions(
187		'email!' => \$email,
188		'git!' => \$email_git,
189		'git-all-signature-types!' => \$email_git_all_signature_types,
190		'git-blame!' => \$email_git_blame,
191		'git-blame-signatures!' => \$email_git_blame_signatures,
192		'git-fallback!' => \$email_git_fallback,
193		'git-chief-penguins!' => \$email_git_penguin_chiefs,
194		'git-min-signatures=i' => \$email_git_min_signatures,
195		'git-max-maintainers=i' => \$email_git_max_maintainers,
196		'git-min-percent=i' => \$email_git_min_percent,
197		'git-since=s' => \$email_git_since,
198		'hg-since=s' => \$email_hg_since,
199		'i|interactive!' => \$interactive,
200		'remove-duplicates!' => \$email_remove_duplicates,
201		'mailmap!' => \$email_use_mailmap,
202		'm!' => \$email_maintainer,
203		'n!' => \$email_usename,
204		'l!' => \$email_list,
205		's!' => \$email_subscriber_list,
206		'multiline!' => \$output_multiline,
207		'roles!' => \$output_roles,
208		'rolestats!' => \$output_rolestats,
209		'separator=s' => \$output_separator,
210		'subsystem!' => \$subsystem,
211		'status!' => \$status,
212		'scm!' => \$scm,
213		'web!' => \$web,
214		'pattern-depth=i' => \$pattern_depth,
215		'k|keywords!' => \$keywords,
216		'sections!' => \$sections,
217		'fe|file-emails!' => \$file_emails,
218		'f|file' => \$from_filename,
219		'v|version' => \$version,
220		'h|help|usage' => \$help,
221		)) {
222    die "$P: invalid argument - use --help if necessary\n";
223}
224
225if ($help != 0) {
226    usage();
227    exit 0;
228}
229
230if ($version != 0) {
231    print("${P} ${V}\n");
232    exit 0;
233}
234
235if (-t STDIN && !@ARGV) {
236    # We're talking to a terminal, but have no command line arguments.
237    die "$P: missing patchfile or -f file - use --help if necessary\n";
238}
239
240$output_multiline = 0 if ($output_separator ne ", ");
241$output_rolestats = 1 if ($interactive);
242$output_roles = 1 if ($output_rolestats);
243
244if ($sections) {
245    $email = 0;
246    $email_list = 0;
247    $scm = 0;
248    $status = 0;
249    $subsystem = 0;
250    $web = 0;
251    $keywords = 0;
252    $interactive = 0;
253} else {
254    my $selections = $email + $scm + $status + $subsystem + $web;
255    if ($selections == 0) {
256	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
257    }
258}
259
260if ($email &&
261    ($email_maintainer + $email_list + $email_subscriber_list +
262     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
263    die "$P: Please select at least 1 email option\n";
264}
265
266if (!top_of_kernel_tree($lk_path)) {
267    die "$P: The current directory does not appear to be "
268	. "a linux kernel source tree.\n";
269}
270
271## Read MAINTAINERS for type/value pairs
272
273my @typevalue = ();
274my %keyword_hash;
275
276open (my $maint, '<', "${lk_path}MAINTAINERS")
277    or die "$P: Can't open MAINTAINERS: $!\n";
278while (<$maint>) {
279    my $line = $_;
280
281    if ($line =~ m/^(\C):\s*(.*)/) {
282	my $type = $1;
283	my $value = $2;
284
285	##Filename pattern matching
286	if ($type eq "F" || $type eq "X") {
287	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
288	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
289	    $value =~ s/\?/\./g;         ##Convert ? to .
290	    ##if pattern is a directory and it lacks a trailing slash, add one
291	    if ((-d $value)) {
292		$value =~ s@([^/])$@$1/@;
293	    }
294	} elsif ($type eq "K") {
295	    $keyword_hash{@typevalue} = $value;
296	}
297	push(@typevalue, "$type:$value");
298    } elsif (!/^(\s)*$/) {
299	$line =~ s/\n$//g;
300	push(@typevalue, $line);
301    }
302}
303close($maint);
304
305
306#
307# Read mail address map
308#
309
310my $mailmap;
311
312read_mailmap();
313
314sub read_mailmap {
315    $mailmap = {
316	names => {},
317	addresses => {}
318    };
319
320    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
321
322    open(my $mailmap_file, '<', "${lk_path}.mailmap")
323	or warn "$P: Can't open .mailmap: $!\n";
324
325    while (<$mailmap_file>) {
326	s/#.*$//; #strip comments
327	s/^\s+|\s+$//g; #trim
328
329	next if (/^\s*$/); #skip empty lines
330	#entries have one of the following formats:
331	# name1 <mail1>
332	# <mail1> <mail2>
333	# name1 <mail1> <mail2>
334	# name1 <mail1> name2 <mail2>
335	# (see man git-shortlog)
336
337	if (/^([^<]+)<([^>]+)>$/) {
338	    my $real_name = $1;
339	    my $address = $2;
340
341	    $real_name =~ s/\s+$//;
342	    ($real_name, $address) = parse_email("$real_name <$address>");
343	    $mailmap->{names}->{$address} = $real_name;
344
345	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
346	    my $real_address = $1;
347	    my $wrong_address = $2;
348
349	    $mailmap->{addresses}->{$wrong_address} = $real_address;
350
351	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
352	    my $real_name = $1;
353	    my $real_address = $2;
354	    my $wrong_address = $3;
355
356	    $real_name =~ s/\s+$//;
357	    ($real_name, $real_address) =
358		parse_email("$real_name <$real_address>");
359	    $mailmap->{names}->{$wrong_address} = $real_name;
360	    $mailmap->{addresses}->{$wrong_address} = $real_address;
361
362	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
363	    my $real_name = $1;
364	    my $real_address = $2;
365	    my $wrong_name = $3;
366	    my $wrong_address = $4;
367
368	    $real_name =~ s/\s+$//;
369	    ($real_name, $real_address) =
370		parse_email("$real_name <$real_address>");
371
372	    $wrong_name =~ s/\s+$//;
373	    ($wrong_name, $wrong_address) =
374		parse_email("$wrong_name <$wrong_address>");
375
376	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
377	    $mailmap->{names}->{$wrong_email} = $real_name;
378	    $mailmap->{addresses}->{$wrong_email} = $real_address;
379	}
380    }
381    close($mailmap_file);
382}
383
384## use the filenames on the command line or find the filenames in the patchfiles
385
386my @files = ();
387my @range = ();
388my @keyword_tvi = ();
389my @file_emails = ();
390
391if (!@ARGV) {
392    push(@ARGV, "&STDIN");
393}
394
395foreach my $file (@ARGV) {
396    if ($file ne "&STDIN") {
397	##if $file is a directory and it lacks a trailing slash, add one
398	if ((-d $file)) {
399	    $file =~ s@([^/])$@$1/@;
400	} elsif (!(-f $file)) {
401	    die "$P: file '${file}' not found\n";
402	}
403    }
404    if ($from_filename) {
405	push(@files, $file);
406	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
407	    open(my $f, '<', $file)
408		or die "$P: Can't open $file: $!\n";
409	    my $text = do { local($/) ; <$f> };
410	    close($f);
411	    if ($keywords) {
412		foreach my $line (keys %keyword_hash) {
413		    if ($text =~ m/$keyword_hash{$line}/x) {
414			push(@keyword_tvi, $line);
415		    }
416		}
417	    }
418	    if ($file_emails) {
419		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
420		push(@file_emails, clean_file_emails(@poss_addr));
421	    }
422	}
423    } else {
424	my $file_cnt = @files;
425	my $lastfile;
426
427	open(my $patch, "< $file")
428	    or die "$P: Can't open $file: $!\n";
429
430	# We can check arbitrary information before the patch
431	# like the commit message, mail headers, etc...
432	# This allows us to match arbitrary keywords against any part
433	# of a git format-patch generated file (subject tags, etc...)
434
435	my $patch_prefix = "";			#Parsing the intro
436
437	while (<$patch>) {
438	    my $patch_line = $_;
439	    if (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
440		my $filename = $1;
441		$filename =~ s@^[^/]*/@@;
442		$filename =~ s@\n@@;
443		$lastfile = $filename;
444		push(@files, $filename);
445		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
446	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
447		if ($email_git_blame) {
448		    push(@range, "$lastfile:$1:$2");
449		}
450	    } elsif ($keywords) {
451		foreach my $line (keys %keyword_hash) {
452		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
453			push(@keyword_tvi, $line);
454		    }
455		}
456	    }
457	}
458	close($patch);
459
460	if ($file_cnt == @files) {
461	    warn "$P: file '${file}' doesn't appear to be a patch.  "
462		. "Add -f to options?\n";
463	}
464	@files = sort_and_uniq(@files);
465    }
466}
467
468@file_emails = uniq(@file_emails);
469
470my %email_hash_name;
471my %email_hash_address;
472my @email_to = ();
473my %hash_list_to;
474my @list_to = ();
475my @scm = ();
476my @web = ();
477my @subsystem = ();
478my @status = ();
479my %deduplicate_name_hash = ();
480my %deduplicate_address_hash = ();
481
482my @maintainers = get_maintainers();
483
484if (@maintainers) {
485    @maintainers = merge_email(@maintainers);
486    output(@maintainers);
487}
488
489if ($scm) {
490    @scm = uniq(@scm);
491    output(@scm);
492}
493
494if ($status) {
495    @status = uniq(@status);
496    output(@status);
497}
498
499if ($subsystem) {
500    @subsystem = uniq(@subsystem);
501    output(@subsystem);
502}
503
504if ($web) {
505    @web = uniq(@web);
506    output(@web);
507}
508
509exit($exit);
510
511sub range_is_maintained {
512    my ($start, $end) = @_;
513
514    for (my $i = $start; $i < $end; $i++) {
515	my $line = $typevalue[$i];
516	if ($line =~ m/^(\C):\s*(.*)/) {
517	    my $type = $1;
518	    my $value = $2;
519	    if ($type eq 'S') {
520		if ($value =~ /(maintain|support)/i) {
521		    return 1;
522		}
523	    }
524	}
525    }
526    return 0;
527}
528
529sub range_has_maintainer {
530    my ($start, $end) = @_;
531
532    for (my $i = $start; $i < $end; $i++) {
533	my $line = $typevalue[$i];
534	if ($line =~ m/^(\C):\s*(.*)/) {
535	    my $type = $1;
536	    my $value = $2;
537	    if ($type eq 'M') {
538		return 1;
539	    }
540	}
541    }
542    return 0;
543}
544
545sub get_maintainers {
546    %email_hash_name = ();
547    %email_hash_address = ();
548    %commit_author_hash = ();
549    %commit_signer_hash = ();
550    @email_to = ();
551    %hash_list_to = ();
552    @list_to = ();
553    @scm = ();
554    @web = ();
555    @subsystem = ();
556    @status = ();
557    %deduplicate_name_hash = ();
558    %deduplicate_address_hash = ();
559    if ($email_git_all_signature_types) {
560	$signature_pattern = "(.+?)[Bb][Yy]:";
561    } else {
562	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
563    }
564
565    # Find responsible parties
566
567    my %exact_pattern_match_hash = ();
568
569    foreach my $file (@files) {
570
571	my %hash;
572	my $tvi = find_first_section();
573	while ($tvi < @typevalue) {
574	    my $start = find_starting_index($tvi);
575	    my $end = find_ending_index($tvi);
576	    my $exclude = 0;
577	    my $i;
578
579	    #Do not match excluded file patterns
580
581	    for ($i = $start; $i < $end; $i++) {
582		my $line = $typevalue[$i];
583		if ($line =~ m/^(\C):\s*(.*)/) {
584		    my $type = $1;
585		    my $value = $2;
586		    if ($type eq 'X') {
587			if (file_match_pattern($file, $value)) {
588			    $exclude = 1;
589			    last;
590			}
591		    }
592		}
593	    }
594
595	    if (!$exclude) {
596		for ($i = $start; $i < $end; $i++) {
597		    my $line = $typevalue[$i];
598		    if ($line =~ m/^(\C):\s*(.*)/) {
599			my $type = $1;
600			my $value = $2;
601			if ($type eq 'F') {
602			    if (file_match_pattern($file, $value)) {
603				my $value_pd = ($value =~ tr@/@@);
604				my $file_pd = ($file  =~ tr@/@@);
605				$value_pd++ if (substr($value,-1,1) ne "/");
606				$value_pd = -1 if ($value =~ /^\.\*/);
607				if ($value_pd >= $file_pd &&
608				    range_is_maintained($start, $end) &&
609				    range_has_maintainer($start, $end)) {
610				    $exact_pattern_match_hash{$file} = 1;
611				}
612				if ($pattern_depth == 0 ||
613				    (($file_pd - $value_pd) < $pattern_depth)) {
614				    $hash{$tvi} = $value_pd;
615				}
616			    }
617			} elsif ($type eq 'N') {
618			    if ($file =~ m/$value/x) {
619				$hash{$tvi} = 0;
620			    }
621			}
622		    }
623		}
624	    }
625	    $tvi = $end + 1;
626	}
627
628	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
629	    add_categories($line);
630	    if ($sections) {
631		my $i;
632		my $start = find_starting_index($line);
633		my $end = find_ending_index($line);
634		for ($i = $start; $i < $end; $i++) {
635		    my $line = $typevalue[$i];
636		    if ($line =~ /^[FX]:/) {		##Restore file patterns
637			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
638			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
639			$line =~ s/\\\./\./g;       	##Convert \. to .
640			$line =~ s/\.\*/\*/g;       	##Convert .* to *
641		    }
642		    $line =~ s/^([A-Z]):/$1:\t/g;
643		    print("$line\n");
644		}
645		print("\n");
646	    }
647	}
648    }
649
650    if ($keywords) {
651	@keyword_tvi = sort_and_uniq(@keyword_tvi);
652	foreach my $line (@keyword_tvi) {
653	    add_categories($line);
654	}
655    }
656
657    foreach my $email (@email_to, @list_to) {
658	$email->[0] = deduplicate_email($email->[0]);
659    }
660
661    foreach my $file (@files) {
662	if ($email &&
663	    ($email_git || ($email_git_fallback &&
664			    !$exact_pattern_match_hash{$file}))) {
665	    vcs_file_signoffs($file);
666	}
667	if ($email && $email_git_blame) {
668	    vcs_file_blame($file);
669	}
670    }
671
672    if ($email) {
673	foreach my $chief (@penguin_chief) {
674	    if ($chief =~ m/^(.*):(.*)/) {
675		my $email_address;
676
677		$email_address = format_email($1, $2, $email_usename);
678		if ($email_git_penguin_chiefs) {
679		    push(@email_to, [$email_address, 'chief penguin']);
680		} else {
681		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
682		}
683	    }
684	}
685
686	foreach my $email (@file_emails) {
687	    my ($name, $address) = parse_email($email);
688
689	    my $tmp_email = format_email($name, $address, $email_usename);
690	    push_email_address($tmp_email, '');
691	    add_role($tmp_email, 'in file');
692	}
693    }
694
695    my @to = ();
696    if ($email || $email_list) {
697	if ($email) {
698	    @to = (@to, @email_to);
699	}
700	if ($email_list) {
701	    @to = (@to, @list_to);
702	}
703    }
704
705    if ($interactive) {
706	@to = interactive_get_maintainers(\@to);
707    }
708
709    return @to;
710}
711
712sub file_match_pattern {
713    my ($file, $pattern) = @_;
714    if (substr($pattern, -1) eq "/") {
715	if ($file =~ m@^$pattern@) {
716	    return 1;
717	}
718    } else {
719	if ($file =~ m@^$pattern@) {
720	    my $s1 = ($file =~ tr@/@@);
721	    my $s2 = ($pattern =~ tr@/@@);
722	    if ($s1 == $s2) {
723		return 1;
724	    }
725	}
726    }
727    return 0;
728}
729
730sub usage {
731    print <<EOT;
732usage: $P [options] patchfile
733       $P [options] -f file|directory
734version: $V
735
736MAINTAINER field selection options:
737  --email => print email address(es) if any
738    --git => include recent git \*-by: signers
739    --git-all-signature-types => include signers regardless of signature type
740        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
741    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
742    --git-chief-penguins => include ${penguin_chiefs}
743    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
744    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
745    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
746    --git-blame => use git blame to find modified commits for patch or file
747    --git-since => git history to use (default: $email_git_since)
748    --hg-since => hg history to use (default: $email_hg_since)
749    --interactive => display a menu (mostly useful if used with the --git option)
750    --m => include maintainer(s) if any
751    --n => include name 'Full Name <addr\@domain.tld>'
752    --l => include list(s) if any
753    --s => include subscriber only list(s) if any
754    --remove-duplicates => minimize duplicate email names/addresses
755    --roles => show roles (status:subsystem, git-signer, list, etc...)
756    --rolestats => show roles and statistics (commits/total_commits, %)
757    --file-emails => add email addresses found in -f file (default: 0 (off))
758  --scm => print SCM tree(s) if any
759  --status => print status if any
760  --subsystem => print subsystem name if any
761  --web => print website(s) if any
762
763Output type options:
764  --separator [, ] => separator for multiple entries on 1 line
765    using --separator also sets --nomultiline if --separator is not [, ]
766  --multiline => print 1 entry per line
767
768Other options:
769  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
770  --keywords => scan patch for keywords (default: $keywords)
771  --sections => print all of the subsystem sections with pattern matches
772  --mailmap => use .mailmap file (default: $email_use_mailmap)
773  --version => show version
774  --help => show this help information
775
776Default options:
777  [--email --nogit --git-fallback --m --n --l --multiline -pattern-depth=0
778   --remove-duplicates --rolestats]
779
780Notes:
781  Using "-f directory" may give unexpected results:
782      Used with "--git", git signators for _all_ files in and below
783          directory are examined as git recurses directories.
784          Any specified X: (exclude) pattern matches are _not_ ignored.
785      Used with "--nogit", directory is used as a pattern match,
786          no individual file within the directory or subdirectory
787          is matched.
788      Used with "--git-blame", does not iterate all files in directory
789  Using "--git-blame" is slow and may add old committers and authors
790      that are no longer active maintainers to the output.
791  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
792      other automated tools that expect only ["name"] <email address>
793      may not work because of additional output after <email address>.
794  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
795      not the percentage of the entire file authored.  # of commits is
796      not a good measure of amount of code authored.  1 major commit may
797      contain a thousand lines, 5 trivial commits may modify a single line.
798  If git is not installed, but mercurial (hg) is installed and an .hg
799      repository exists, the following options apply to mercurial:
800          --git,
801          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
802          --git-blame
803      Use --hg-since not --git-since to control date selection
804  File ".get_maintainer.conf", if it exists in the linux kernel source root
805      directory, can change whatever get_maintainer defaults are desired.
806      Entries in this file can be any command line argument.
807      This file is prepended to any additional command line arguments.
808      Multiple lines and # comments are allowed.
809EOT
810}
811
812sub top_of_kernel_tree {
813    my ($lk_path) = @_;
814
815    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
816	$lk_path .= "/";
817    }
818    if (   (-f "${lk_path}CREDITS")
819	&& (-f "${lk_path}Kbuild")
820	&& (-f "${lk_path}MAINTAINERS")
821	&& (-f "${lk_path}Makefile")
822	&& (-f "${lk_path}README")
823	&& (-d "${lk_path}arch")
824	&& (-d "${lk_path}board")
825	&& (-d "${lk_path}common")
826	&& (-d "${lk_path}doc")
827	&& (-d "${lk_path}drivers")
828	&& (-d "${lk_path}dts")
829	&& (-d "${lk_path}fs")
830	&& (-d "${lk_path}lib")
831	&& (-d "${lk_path}include")
832	&& (-d "${lk_path}net")
833	&& (-d "${lk_path}post")
834	&& (-d "${lk_path}scripts")
835	&& (-d "${lk_path}test")
836	&& (-d "${lk_path}tools")) {
837	return 1;
838    }
839    return 0;
840}
841
842sub parse_email {
843    my ($formatted_email) = @_;
844
845    my $name = "";
846    my $address = "";
847
848    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
849	$name = $1;
850	$address = $2;
851    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
852	$address = $1;
853    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
854	$address = $1;
855    }
856
857    $name =~ s/^\s+|\s+$//g;
858    $name =~ s/^\"|\"$//g;
859    $address =~ s/^\s+|\s+$//g;
860
861    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
862	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
863	$name = "\"$name\"";
864    }
865
866    return ($name, $address);
867}
868
869sub format_email {
870    my ($name, $address, $usename) = @_;
871
872    my $formatted_email;
873
874    $name =~ s/^\s+|\s+$//g;
875    $name =~ s/^\"|\"$//g;
876    $address =~ s/^\s+|\s+$//g;
877
878    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
879	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
880	$name = "\"$name\"";
881    }
882
883    if ($usename) {
884	if ("$name" eq "") {
885	    $formatted_email = "$address";
886	} else {
887	    $formatted_email = "$name <$address>";
888	}
889    } else {
890	$formatted_email = $address;
891    }
892
893    return $formatted_email;
894}
895
896sub find_first_section {
897    my $index = 0;
898
899    while ($index < @typevalue) {
900	my $tv = $typevalue[$index];
901	if (($tv =~ m/^(\C):\s*(.*)/)) {
902	    last;
903	}
904	$index++;
905    }
906
907    return $index;
908}
909
910sub find_starting_index {
911    my ($index) = @_;
912
913    while ($index > 0) {
914	my $tv = $typevalue[$index];
915	if (!($tv =~ m/^(\C):\s*(.*)/)) {
916	    last;
917	}
918	$index--;
919    }
920
921    return $index;
922}
923
924sub find_ending_index {
925    my ($index) = @_;
926
927    while ($index < @typevalue) {
928	my $tv = $typevalue[$index];
929	if (!($tv =~ m/^(\C):\s*(.*)/)) {
930	    last;
931	}
932	$index++;
933    }
934
935    return $index;
936}
937
938sub get_maintainer_role {
939    my ($index) = @_;
940
941    my $i;
942    my $start = find_starting_index($index);
943    my $end = find_ending_index($index);
944
945    my $role = "unknown";
946    my $subsystem = $typevalue[$start];
947    if (length($subsystem) > 20) {
948	$subsystem = substr($subsystem, 0, 17);
949	$subsystem =~ s/\s*$//;
950	$subsystem = $subsystem . "...";
951    }
952
953    for ($i = $start + 1; $i < $end; $i++) {
954	my $tv = $typevalue[$i];
955	if ($tv =~ m/^(\C):\s*(.*)/) {
956	    my $ptype = $1;
957	    my $pvalue = $2;
958	    if ($ptype eq "S") {
959		$role = $pvalue;
960	    }
961	}
962    }
963
964    $role = lc($role);
965    if      ($role eq "supported") {
966	$role = "supporter";
967    } elsif ($role eq "maintained") {
968	$role = "maintainer";
969    } elsif ($role eq "odd fixes") {
970	$role = "odd fixer";
971    } elsif ($role eq "orphan") {
972	$role = "orphan minder";
973    } elsif ($role eq "obsolete") {
974	$role = "obsolete minder";
975    } elsif ($role eq "buried alive in reporters") {
976	$role = "chief penguin";
977    }
978
979    return $role . ":" . $subsystem;
980}
981
982sub get_list_role {
983    my ($index) = @_;
984
985    my $i;
986    my $start = find_starting_index($index);
987    my $end = find_ending_index($index);
988
989    my $subsystem = $typevalue[$start];
990    if (length($subsystem) > 20) {
991	$subsystem = substr($subsystem, 0, 17);
992	$subsystem =~ s/\s*$//;
993	$subsystem = $subsystem . "...";
994    }
995
996    if ($subsystem eq "THE REST") {
997	$subsystem = "";
998    }
999
1000    return $subsystem;
1001}
1002
1003sub add_categories {
1004    my ($index) = @_;
1005
1006    my $i;
1007    my $start = find_starting_index($index);
1008    my $end = find_ending_index($index);
1009
1010    push(@subsystem, $typevalue[$start]);
1011
1012    for ($i = $start + 1; $i < $end; $i++) {
1013	my $tv = $typevalue[$i];
1014	if ($tv =~ m/^(\C):\s*(.*)/) {
1015	    my $ptype = $1;
1016	    my $pvalue = $2;
1017	    if ($ptype eq "L") {
1018		my $list_address = $pvalue;
1019		my $list_additional = "";
1020		my $list_role = get_list_role($i);
1021
1022		if ($list_role ne "") {
1023		    $list_role = ":" . $list_role;
1024		}
1025		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1026		    $list_address = $1;
1027		    $list_additional = $2;
1028		}
1029		if ($list_additional =~ m/subscribers-only/) {
1030		    if ($email_subscriber_list) {
1031			if (!$hash_list_to{lc($list_address)}) {
1032			    $hash_list_to{lc($list_address)} = 1;
1033			    push(@list_to, [$list_address,
1034					    "subscriber list${list_role}"]);
1035			}
1036		    }
1037		} else {
1038		    if ($email_list) {
1039			if (!$hash_list_to{lc($list_address)}) {
1040			    $hash_list_to{lc($list_address)} = 1;
1041			    if ($list_additional =~ m/moderated/) {
1042				push(@list_to, [$list_address,
1043						"moderated list${list_role}"]);
1044			    } else {
1045				push(@list_to, [$list_address,
1046						"open list${list_role}"]);
1047			    }
1048			}
1049		    }
1050		}
1051	    } elsif ($ptype eq "M") {
1052		my ($name, $address) = parse_email($pvalue);
1053		if ($name eq "") {
1054		    if ($i > 0) {
1055			my $tv = $typevalue[$i - 1];
1056			if ($tv =~ m/^(\C):\s*(.*)/) {
1057			    if ($1 eq "P") {
1058				$name = $2;
1059				$pvalue = format_email($name, $address, $email_usename);
1060			    }
1061			}
1062		    }
1063		}
1064		if ($email_maintainer) {
1065		    my $role = get_maintainer_role($i);
1066		    push_email_addresses($pvalue, $role);
1067		}
1068	    } elsif ($ptype eq "T") {
1069		push(@scm, $pvalue);
1070	    } elsif ($ptype eq "W") {
1071		push(@web, $pvalue);
1072	    } elsif ($ptype eq "S") {
1073		push(@status, $pvalue);
1074	    }
1075	}
1076    }
1077}
1078
1079sub email_inuse {
1080    my ($name, $address) = @_;
1081
1082    return 1 if (($name eq "") && ($address eq ""));
1083    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1084    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1085
1086    return 0;
1087}
1088
1089sub push_email_address {
1090    my ($line, $role) = @_;
1091
1092    my ($name, $address) = parse_email($line);
1093
1094    if ($address eq "") {
1095	return 0;
1096    }
1097
1098    if (!$email_remove_duplicates) {
1099	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1100    } elsif (!email_inuse($name, $address)) {
1101	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1102	$email_hash_name{lc($name)}++ if ($name ne "");
1103	$email_hash_address{lc($address)}++;
1104    }
1105
1106    return 1;
1107}
1108
1109sub push_email_addresses {
1110    my ($address, $role) = @_;
1111
1112    my @address_list = ();
1113
1114    if (rfc822_valid($address)) {
1115	push_email_address($address, $role);
1116    } elsif (@address_list = rfc822_validlist($address)) {
1117	my $array_count = shift(@address_list);
1118	while (my $entry = shift(@address_list)) {
1119	    push_email_address($entry, $role);
1120	}
1121    } else {
1122	if (!push_email_address($address, $role)) {
1123	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1124	}
1125    }
1126}
1127
1128sub add_role {
1129    my ($line, $role) = @_;
1130
1131    my ($name, $address) = parse_email($line);
1132    my $email = format_email($name, $address, $email_usename);
1133
1134    foreach my $entry (@email_to) {
1135	if ($email_remove_duplicates) {
1136	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1137	    if (($name eq $entry_name || $address eq $entry_address)
1138		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1139	    ) {
1140		if ($entry->[1] eq "") {
1141		    $entry->[1] = "$role";
1142		} else {
1143		    $entry->[1] = "$entry->[1],$role";
1144		}
1145	    }
1146	} else {
1147	    if ($email eq $entry->[0]
1148		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1149	    ) {
1150		if ($entry->[1] eq "") {
1151		    $entry->[1] = "$role";
1152		} else {
1153		    $entry->[1] = "$entry->[1],$role";
1154		}
1155	    }
1156	}
1157    }
1158}
1159
1160sub which {
1161    my ($bin) = @_;
1162
1163    foreach my $path (split(/:/, $ENV{PATH})) {
1164	if (-e "$path/$bin") {
1165	    return "$path/$bin";
1166	}
1167    }
1168
1169    return "";
1170}
1171
1172sub which_conf {
1173    my ($conf) = @_;
1174
1175    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1176	if (-e "$path/$conf") {
1177	    return "$path/$conf";
1178	}
1179    }
1180
1181    return "";
1182}
1183
1184sub mailmap_email {
1185    my ($line) = @_;
1186
1187    my ($name, $address) = parse_email($line);
1188    my $email = format_email($name, $address, 1);
1189    my $real_name = $name;
1190    my $real_address = $address;
1191
1192    if (exists $mailmap->{names}->{$email} ||
1193	exists $mailmap->{addresses}->{$email}) {
1194	if (exists $mailmap->{names}->{$email}) {
1195	    $real_name = $mailmap->{names}->{$email};
1196	}
1197	if (exists $mailmap->{addresses}->{$email}) {
1198	    $real_address = $mailmap->{addresses}->{$email};
1199	}
1200    } else {
1201	if (exists $mailmap->{names}->{$address}) {
1202	    $real_name = $mailmap->{names}->{$address};
1203	}
1204	if (exists $mailmap->{addresses}->{$address}) {
1205	    $real_address = $mailmap->{addresses}->{$address};
1206	}
1207    }
1208    return format_email($real_name, $real_address, 1);
1209}
1210
1211sub mailmap {
1212    my (@addresses) = @_;
1213
1214    my @mapped_emails = ();
1215    foreach my $line (@addresses) {
1216	push(@mapped_emails, mailmap_email($line));
1217    }
1218    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1219    return @mapped_emails;
1220}
1221
1222sub merge_by_realname {
1223    my %address_map;
1224    my (@emails) = @_;
1225
1226    foreach my $email (@emails) {
1227	my ($name, $address) = parse_email($email);
1228	if (exists $address_map{$name}) {
1229	    $address = $address_map{$name};
1230	    $email = format_email($name, $address, 1);
1231	} else {
1232	    $address_map{$name} = $address;
1233	}
1234    }
1235}
1236
1237sub git_execute_cmd {
1238    my ($cmd) = @_;
1239    my @lines = ();
1240
1241    my $output = `$cmd`;
1242    $output =~ s/^\s*//gm;
1243    @lines = split("\n", $output);
1244
1245    return @lines;
1246}
1247
1248sub hg_execute_cmd {
1249    my ($cmd) = @_;
1250    my @lines = ();
1251
1252    my $output = `$cmd`;
1253    @lines = split("\n", $output);
1254
1255    return @lines;
1256}
1257
1258sub extract_formatted_signatures {
1259    my (@signature_lines) = @_;
1260
1261    my @type = @signature_lines;
1262
1263    s/\s*(.*):.*/$1/ for (@type);
1264
1265    # cut -f2- -d":"
1266    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1267
1268## Reformat email addresses (with names) to avoid badly written signatures
1269
1270    foreach my $signer (@signature_lines) {
1271	$signer = deduplicate_email($signer);
1272    }
1273
1274    return (\@type, \@signature_lines);
1275}
1276
1277sub vcs_find_signers {
1278    my ($cmd, $file) = @_;
1279    my $commits;
1280    my @lines = ();
1281    my @signatures = ();
1282    my @authors = ();
1283    my @stats = ();
1284
1285    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1286
1287    my $pattern = $VCS_cmds{"commit_pattern"};
1288    my $author_pattern = $VCS_cmds{"author_pattern"};
1289    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1290
1291    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1292
1293    $commits = grep(/$pattern/, @lines);	# of commits
1294
1295    @authors = grep(/$author_pattern/, @lines);
1296    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1297    @stats = grep(/$stat_pattern/, @lines);
1298
1299#    print("stats: <@stats>\n");
1300
1301    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1302
1303    save_commits_by_author(@lines) if ($interactive);
1304    save_commits_by_signer(@lines) if ($interactive);
1305
1306    if (!$email_git_penguin_chiefs) {
1307	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1308    }
1309
1310    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1311    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1312
1313    return ($commits, $signers_ref, $authors_ref, \@stats);
1314}
1315
1316sub vcs_find_author {
1317    my ($cmd) = @_;
1318    my @lines = ();
1319
1320    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1321
1322    if (!$email_git_penguin_chiefs) {
1323	@lines = grep(!/${penguin_chiefs}/i, @lines);
1324    }
1325
1326    return @lines if !@lines;
1327
1328    my @authors = ();
1329    foreach my $line (@lines) {
1330	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1331	    my $author = $1;
1332	    my ($name, $address) = parse_email($author);
1333	    $author = format_email($name, $address, 1);
1334	    push(@authors, $author);
1335	}
1336    }
1337
1338    save_commits_by_author(@lines) if ($interactive);
1339    save_commits_by_signer(@lines) if ($interactive);
1340
1341    return @authors;
1342}
1343
1344sub vcs_save_commits {
1345    my ($cmd) = @_;
1346    my @lines = ();
1347    my @commits = ();
1348
1349    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1350
1351    foreach my $line (@lines) {
1352	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1353	    push(@commits, $1);
1354	}
1355    }
1356
1357    return @commits;
1358}
1359
1360sub vcs_blame {
1361    my ($file) = @_;
1362    my $cmd;
1363    my @commits = ();
1364
1365    return @commits if (!(-f $file));
1366
1367    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1368	my @all_commits = ();
1369
1370	$cmd = $VCS_cmds{"blame_file_cmd"};
1371	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1372	@all_commits = vcs_save_commits($cmd);
1373
1374	foreach my $file_range_diff (@range) {
1375	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1376	    my $diff_file = $1;
1377	    my $diff_start = $2;
1378	    my $diff_length = $3;
1379	    next if ("$file" ne "$diff_file");
1380	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1381		push(@commits, $all_commits[$i]);
1382	    }
1383	}
1384    } elsif (@range) {
1385	foreach my $file_range_diff (@range) {
1386	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1387	    my $diff_file = $1;
1388	    my $diff_start = $2;
1389	    my $diff_length = $3;
1390	    next if ("$file" ne "$diff_file");
1391	    $cmd = $VCS_cmds{"blame_range_cmd"};
1392	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1393	    push(@commits, vcs_save_commits($cmd));
1394	}
1395    } else {
1396	$cmd = $VCS_cmds{"blame_file_cmd"};
1397	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1398	@commits = vcs_save_commits($cmd);
1399    }
1400
1401    foreach my $commit (@commits) {
1402	$commit =~ s/^\^//g;
1403    }
1404
1405    return @commits;
1406}
1407
1408my $printed_novcs = 0;
1409sub vcs_exists {
1410    %VCS_cmds = %VCS_cmds_git;
1411    return 1 if eval $VCS_cmds{"available"};
1412    %VCS_cmds = %VCS_cmds_hg;
1413    return 2 if eval $VCS_cmds{"available"};
1414    %VCS_cmds = ();
1415    if (!$printed_novcs) {
1416	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1417	warn("Using a git repository produces better results.\n");
1418	warn("Try Linus Torvalds' latest git repository using:\n");
1419	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1420	$printed_novcs = 1;
1421    }
1422    return 0;
1423}
1424
1425sub vcs_is_git {
1426    vcs_exists();
1427    return $vcs_used == 1;
1428}
1429
1430sub vcs_is_hg {
1431    return $vcs_used == 2;
1432}
1433
1434sub interactive_get_maintainers {
1435    my ($list_ref) = @_;
1436    my @list = @$list_ref;
1437
1438    vcs_exists();
1439
1440    my %selected;
1441    my %authored;
1442    my %signed;
1443    my $count = 0;
1444    my $maintained = 0;
1445    foreach my $entry (@list) {
1446	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1447	$selected{$count} = 1;
1448	$authored{$count} = 0;
1449	$signed{$count} = 0;
1450	$count++;
1451    }
1452
1453    #menu loop
1454    my $done = 0;
1455    my $print_options = 0;
1456    my $redraw = 1;
1457    while (!$done) {
1458	$count = 0;
1459	if ($redraw) {
1460	    printf STDERR "\n%1s %2s %-65s",
1461			  "*", "#", "email/list and role:stats";
1462	    if ($email_git ||
1463		($email_git_fallback && !$maintained) ||
1464		$email_git_blame) {
1465		print STDERR "auth sign";
1466	    }
1467	    print STDERR "\n";
1468	    foreach my $entry (@list) {
1469		my $email = $entry->[0];
1470		my $role = $entry->[1];
1471		my $sel = "";
1472		$sel = "*" if ($selected{$count});
1473		my $commit_author = $commit_author_hash{$email};
1474		my $commit_signer = $commit_signer_hash{$email};
1475		my $authored = 0;
1476		my $signed = 0;
1477		$authored++ for (@{$commit_author});
1478		$signed++ for (@{$commit_signer});
1479		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1480		printf STDERR "%4d %4d", $authored, $signed
1481		    if ($authored > 0 || $signed > 0);
1482		printf STDERR "\n     %s\n", $role;
1483		if ($authored{$count}) {
1484		    my $commit_author = $commit_author_hash{$email};
1485		    foreach my $ref (@{$commit_author}) {
1486			print STDERR "     Author: @{$ref}[1]\n";
1487		    }
1488		}
1489		if ($signed{$count}) {
1490		    my $commit_signer = $commit_signer_hash{$email};
1491		    foreach my $ref (@{$commit_signer}) {
1492			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1493		    }
1494		}
1495
1496		$count++;
1497	    }
1498	}
1499	my $date_ref = \$email_git_since;
1500	$date_ref = \$email_hg_since if (vcs_is_hg());
1501	if ($print_options) {
1502	    $print_options = 0;
1503	    if (vcs_exists()) {
1504		print STDERR <<EOT
1505
1506Version Control options:
1507g  use git history      [$email_git]
1508gf use git-fallback     [$email_git_fallback]
1509b  use git blame        [$email_git_blame]
1510bs use blame signatures [$email_git_blame_signatures]
1511c# minimum commits      [$email_git_min_signatures]
1512%# min percent          [$email_git_min_percent]
1513d# history to use       [$$date_ref]
1514x# max maintainers      [$email_git_max_maintainers]
1515t  all signature types  [$email_git_all_signature_types]
1516m  use .mailmap         [$email_use_mailmap]
1517EOT
1518	    }
1519	    print STDERR <<EOT
1520
1521Additional options:
15220  toggle all
1523tm toggle maintainers
1524tg toggle git entries
1525tl toggle open list entries
1526ts toggle subscriber list entries
1527f  emails in file       [$file_emails]
1528k  keywords in file     [$keywords]
1529r  remove duplicates    [$email_remove_duplicates]
1530p# pattern match depth  [$pattern_depth]
1531EOT
1532	}
1533	print STDERR
1534"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1535
1536	my $input = <STDIN>;
1537	chomp($input);
1538
1539	$redraw = 1;
1540	my $rerun = 0;
1541	my @wish = split(/[, ]+/, $input);
1542	foreach my $nr (@wish) {
1543	    $nr = lc($nr);
1544	    my $sel = substr($nr, 0, 1);
1545	    my $str = substr($nr, 1);
1546	    my $val = 0;
1547	    $val = $1 if $str =~ /^(\d+)$/;
1548
1549	    if ($sel eq "y") {
1550		$interactive = 0;
1551		$done = 1;
1552		$output_rolestats = 0;
1553		$output_roles = 0;
1554		last;
1555	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1556		$selected{$nr - 1} = !$selected{$nr - 1};
1557	    } elsif ($sel eq "*" || $sel eq '^') {
1558		my $toggle = 0;
1559		$toggle = 1 if ($sel eq '*');
1560		for (my $i = 0; $i < $count; $i++) {
1561		    $selected{$i} = $toggle;
1562		}
1563	    } elsif ($sel eq "0") {
1564		for (my $i = 0; $i < $count; $i++) {
1565		    $selected{$i} = !$selected{$i};
1566		}
1567	    } elsif ($sel eq "t") {
1568		if (lc($str) eq "m") {
1569		    for (my $i = 0; $i < $count; $i++) {
1570			$selected{$i} = !$selected{$i}
1571			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1572		    }
1573		} elsif (lc($str) eq "g") {
1574		    for (my $i = 0; $i < $count; $i++) {
1575			$selected{$i} = !$selected{$i}
1576			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1577		    }
1578		} elsif (lc($str) eq "l") {
1579		    for (my $i = 0; $i < $count; $i++) {
1580			$selected{$i} = !$selected{$i}
1581			    if ($list[$i]->[1] =~ /^(open list)/i);
1582		    }
1583		} elsif (lc($str) eq "s") {
1584		    for (my $i = 0; $i < $count; $i++) {
1585			$selected{$i} = !$selected{$i}
1586			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1587		    }
1588		}
1589	    } elsif ($sel eq "a") {
1590		if ($val > 0 && $val <= $count) {
1591		    $authored{$val - 1} = !$authored{$val - 1};
1592		} elsif ($str eq '*' || $str eq '^') {
1593		    my $toggle = 0;
1594		    $toggle = 1 if ($str eq '*');
1595		    for (my $i = 0; $i < $count; $i++) {
1596			$authored{$i} = $toggle;
1597		    }
1598		}
1599	    } elsif ($sel eq "s") {
1600		if ($val > 0 && $val <= $count) {
1601		    $signed{$val - 1} = !$signed{$val - 1};
1602		} elsif ($str eq '*' || $str eq '^') {
1603		    my $toggle = 0;
1604		    $toggle = 1 if ($str eq '*');
1605		    for (my $i = 0; $i < $count; $i++) {
1606			$signed{$i} = $toggle;
1607		    }
1608		}
1609	    } elsif ($sel eq "o") {
1610		$print_options = 1;
1611		$redraw = 1;
1612	    } elsif ($sel eq "g") {
1613		if ($str eq "f") {
1614		    bool_invert(\$email_git_fallback);
1615		} else {
1616		    bool_invert(\$email_git);
1617		}
1618		$rerun = 1;
1619	    } elsif ($sel eq "b") {
1620		if ($str eq "s") {
1621		    bool_invert(\$email_git_blame_signatures);
1622		} else {
1623		    bool_invert(\$email_git_blame);
1624		}
1625		$rerun = 1;
1626	    } elsif ($sel eq "c") {
1627		if ($val > 0) {
1628		    $email_git_min_signatures = $val;
1629		    $rerun = 1;
1630		}
1631	    } elsif ($sel eq "x") {
1632		if ($val > 0) {
1633		    $email_git_max_maintainers = $val;
1634		    $rerun = 1;
1635		}
1636	    } elsif ($sel eq "%") {
1637		if ($str ne "" && $val >= 0) {
1638		    $email_git_min_percent = $val;
1639		    $rerun = 1;
1640		}
1641	    } elsif ($sel eq "d") {
1642		if (vcs_is_git()) {
1643		    $email_git_since = $str;
1644		} elsif (vcs_is_hg()) {
1645		    $email_hg_since = $str;
1646		}
1647		$rerun = 1;
1648	    } elsif ($sel eq "t") {
1649		bool_invert(\$email_git_all_signature_types);
1650		$rerun = 1;
1651	    } elsif ($sel eq "f") {
1652		bool_invert(\$file_emails);
1653		$rerun = 1;
1654	    } elsif ($sel eq "r") {
1655		bool_invert(\$email_remove_duplicates);
1656		$rerun = 1;
1657	    } elsif ($sel eq "m") {
1658		bool_invert(\$email_use_mailmap);
1659		read_mailmap();
1660		$rerun = 1;
1661	    } elsif ($sel eq "k") {
1662		bool_invert(\$keywords);
1663		$rerun = 1;
1664	    } elsif ($sel eq "p") {
1665		if ($str ne "" && $val >= 0) {
1666		    $pattern_depth = $val;
1667		    $rerun = 1;
1668		}
1669	    } elsif ($sel eq "h" || $sel eq "?") {
1670		print STDERR <<EOT
1671
1672Interactive mode allows you to select the various maintainers, submitters,
1673commit signers and mailing lists that could be CC'd on a patch.
1674
1675Any *'d entry is selected.
1676
1677If you have git or hg installed, you can choose to summarize the commit
1678history of files in the patch.  Also, each line of the current file can
1679be matched to its commit author and that commits signers with blame.
1680
1681Various knobs exist to control the length of time for active commit
1682tracking, the maximum number of commit authors and signers to add,
1683and such.
1684
1685Enter selections at the prompt until you are satisfied that the selected
1686maintainers are appropriate.  You may enter multiple selections separated
1687by either commas or spaces.
1688
1689EOT
1690	    } else {
1691		print STDERR "invalid option: '$nr'\n";
1692		$redraw = 0;
1693	    }
1694	}
1695	if ($rerun) {
1696	    print STDERR "git-blame can be very slow, please have patience..."
1697		if ($email_git_blame);
1698	    goto &get_maintainers;
1699	}
1700    }
1701
1702    #drop not selected entries
1703    $count = 0;
1704    my @new_emailto = ();
1705    foreach my $entry (@list) {
1706	if ($selected{$count}) {
1707	    push(@new_emailto, $list[$count]);
1708	}
1709	$count++;
1710    }
1711    return @new_emailto;
1712}
1713
1714sub bool_invert {
1715    my ($bool_ref) = @_;
1716
1717    if ($$bool_ref) {
1718	$$bool_ref = 0;
1719    } else {
1720	$$bool_ref = 1;
1721    }
1722}
1723
1724sub deduplicate_email {
1725    my ($email) = @_;
1726
1727    my $matched = 0;
1728    my ($name, $address) = parse_email($email);
1729    $email = format_email($name, $address, 1);
1730    $email = mailmap_email($email);
1731
1732    return $email if (!$email_remove_duplicates);
1733
1734    ($name, $address) = parse_email($email);
1735
1736    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
1737	$name = $deduplicate_name_hash{lc($name)}->[0];
1738	$address = $deduplicate_name_hash{lc($name)}->[1];
1739	$matched = 1;
1740    } elsif ($deduplicate_address_hash{lc($address)}) {
1741	$name = $deduplicate_address_hash{lc($address)}->[0];
1742	$address = $deduplicate_address_hash{lc($address)}->[1];
1743	$matched = 1;
1744    }
1745    if (!$matched) {
1746	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
1747	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
1748    }
1749    $email = format_email($name, $address, 1);
1750    $email = mailmap_email($email);
1751    return $email;
1752}
1753
1754sub save_commits_by_author {
1755    my (@lines) = @_;
1756
1757    my @authors = ();
1758    my @commits = ();
1759    my @subjects = ();
1760
1761    foreach my $line (@lines) {
1762	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1763	    my $author = $1;
1764	    $author = deduplicate_email($author);
1765	    push(@authors, $author);
1766	}
1767	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1768	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1769    }
1770
1771    for (my $i = 0; $i < @authors; $i++) {
1772	my $exists = 0;
1773	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
1774	    if (@{$ref}[0] eq $commits[$i] &&
1775		@{$ref}[1] eq $subjects[$i]) {
1776		$exists = 1;
1777		last;
1778	    }
1779	}
1780	if (!$exists) {
1781	    push(@{$commit_author_hash{$authors[$i]}},
1782		 [ ($commits[$i], $subjects[$i]) ]);
1783	}
1784    }
1785}
1786
1787sub save_commits_by_signer {
1788    my (@lines) = @_;
1789
1790    my $commit = "";
1791    my $subject = "";
1792
1793    foreach my $line (@lines) {
1794	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1795	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1796	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
1797	    my @signatures = ($line);
1798	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1799	    my @types = @$types_ref;
1800	    my @signers = @$signers_ref;
1801
1802	    my $type = $types[0];
1803	    my $signer = $signers[0];
1804
1805	    $signer = deduplicate_email($signer);
1806
1807	    my $exists = 0;
1808	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
1809		if (@{$ref}[0] eq $commit &&
1810		    @{$ref}[1] eq $subject &&
1811		    @{$ref}[2] eq $type) {
1812		    $exists = 1;
1813		    last;
1814		}
1815	    }
1816	    if (!$exists) {
1817		push(@{$commit_signer_hash{$signer}},
1818		     [ ($commit, $subject, $type) ]);
1819	    }
1820	}
1821    }
1822}
1823
1824sub vcs_assign {
1825    my ($role, $divisor, @lines) = @_;
1826
1827    my %hash;
1828    my $count = 0;
1829
1830    return if (@lines <= 0);
1831
1832    if ($divisor <= 0) {
1833	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
1834	$divisor = 1;
1835    }
1836
1837    @lines = mailmap(@lines);
1838
1839    return if (@lines <= 0);
1840
1841    @lines = sort(@lines);
1842
1843    # uniq -c
1844    $hash{$_}++ for @lines;
1845
1846    # sort -rn
1847    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
1848	my $sign_offs = $hash{$line};
1849	my $percent = $sign_offs * 100 / $divisor;
1850
1851	$percent = 100 if ($percent > 100);
1852	$count++;
1853	last if ($sign_offs < $email_git_min_signatures ||
1854		 $count > $email_git_max_maintainers ||
1855		 $percent < $email_git_min_percent);
1856	push_email_address($line, '');
1857	if ($output_rolestats) {
1858	    my $fmt_percent = sprintf("%.0f", $percent);
1859	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
1860	} else {
1861	    add_role($line, $role);
1862	}
1863    }
1864}
1865
1866sub vcs_file_signoffs {
1867    my ($file) = @_;
1868
1869    my $authors_ref;
1870    my $signers_ref;
1871    my $stats_ref;
1872    my @authors = ();
1873    my @signers = ();
1874    my @stats = ();
1875    my $commits;
1876
1877    $vcs_used = vcs_exists();
1878    return if (!$vcs_used);
1879
1880    my $cmd = $VCS_cmds{"find_signers_cmd"};
1881    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
1882
1883    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
1884
1885    @signers = @{$signers_ref} if defined $signers_ref;
1886    @authors = @{$authors_ref} if defined $authors_ref;
1887    @stats = @{$stats_ref} if defined $stats_ref;
1888
1889#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
1890
1891    foreach my $signer (@signers) {
1892	$signer = deduplicate_email($signer);
1893    }
1894
1895    vcs_assign("commit_signer", $commits, @signers);
1896    vcs_assign("authored", $commits, @authors);
1897    if ($#authors == $#stats) {
1898	my $stat_pattern = $VCS_cmds{"stat_pattern"};
1899	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
1900
1901	my $added = 0;
1902	my $deleted = 0;
1903	for (my $i = 0; $i <= $#stats; $i++) {
1904	    if ($stats[$i] =~ /$stat_pattern/) {
1905		$added += $1;
1906		$deleted += $2;
1907	    }
1908	}
1909	my @tmp_authors = uniq(@authors);
1910	foreach my $author (@tmp_authors) {
1911	    $author = deduplicate_email($author);
1912	}
1913	@tmp_authors = uniq(@tmp_authors);
1914	my @list_added = ();
1915	my @list_deleted = ();
1916	foreach my $author (@tmp_authors) {
1917	    my $auth_added = 0;
1918	    my $auth_deleted = 0;
1919	    for (my $i = 0; $i <= $#stats; $i++) {
1920		if ($author eq deduplicate_email($authors[$i]) &&
1921		    $stats[$i] =~ /$stat_pattern/) {
1922		    $auth_added += $1;
1923		    $auth_deleted += $2;
1924		}
1925	    }
1926	    for (my $i = 0; $i < $auth_added; $i++) {
1927		push(@list_added, $author);
1928	    }
1929	    for (my $i = 0; $i < $auth_deleted; $i++) {
1930		push(@list_deleted, $author);
1931	    }
1932	}
1933	vcs_assign("added_lines", $added, @list_added);
1934	vcs_assign("removed_lines", $deleted, @list_deleted);
1935    }
1936}
1937
1938sub vcs_file_blame {
1939    my ($file) = @_;
1940
1941    my @signers = ();
1942    my @all_commits = ();
1943    my @commits = ();
1944    my $total_commits;
1945    my $total_lines;
1946
1947    $vcs_used = vcs_exists();
1948    return if (!$vcs_used);
1949
1950    @all_commits = vcs_blame($file);
1951    @commits = uniq(@all_commits);
1952    $total_commits = @commits;
1953    $total_lines = @all_commits;
1954
1955    if ($email_git_blame_signatures) {
1956	if (vcs_is_hg()) {
1957	    my $commit_count;
1958	    my $commit_authors_ref;
1959	    my $commit_signers_ref;
1960	    my $stats_ref;
1961	    my @commit_authors = ();
1962	    my @commit_signers = ();
1963	    my $commit = join(" -r ", @commits);
1964	    my $cmd;
1965
1966	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1967	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1968
1969	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
1970	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
1971	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
1972
1973	    push(@signers, @commit_signers);
1974	} else {
1975	    foreach my $commit (@commits) {
1976		my $commit_count;
1977		my $commit_authors_ref;
1978		my $commit_signers_ref;
1979		my $stats_ref;
1980		my @commit_authors = ();
1981		my @commit_signers = ();
1982		my $cmd;
1983
1984		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
1985		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1986
1987		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
1988		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
1989		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
1990
1991		push(@signers, @commit_signers);
1992	    }
1993	}
1994    }
1995
1996    if ($from_filename) {
1997	if ($output_rolestats) {
1998	    my @blame_signers;
1999	    if (vcs_is_hg()) {{		# Double brace for last exit
2000		my $commit_count;
2001		my @commit_signers = ();
2002		@commits = uniq(@commits);
2003		@commits = sort(@commits);
2004		my $commit = join(" -r ", @commits);
2005		my $cmd;
2006
2007		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2008		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2009
2010		my @lines = ();
2011
2012		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2013
2014		if (!$email_git_penguin_chiefs) {
2015		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2016		}
2017
2018		last if !@lines;
2019
2020		my @authors = ();
2021		foreach my $line (@lines) {
2022		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2023			my $author = $1;
2024			$author = deduplicate_email($author);
2025			push(@authors, $author);
2026		    }
2027		}
2028
2029		save_commits_by_author(@lines) if ($interactive);
2030		save_commits_by_signer(@lines) if ($interactive);
2031
2032		push(@signers, @authors);
2033	    }}
2034	    else {
2035		foreach my $commit (@commits) {
2036		    my $i;
2037		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2038		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2039		    my @author = vcs_find_author($cmd);
2040		    next if !@author;
2041
2042		    my $formatted_author = deduplicate_email($author[0]);
2043
2044		    my $count = grep(/$commit/, @all_commits);
2045		    for ($i = 0; $i < $count ; $i++) {
2046			push(@blame_signers, $formatted_author);
2047		    }
2048		}
2049	    }
2050	    if (@blame_signers) {
2051		vcs_assign("authored lines", $total_lines, @blame_signers);
2052	    }
2053	}
2054	foreach my $signer (@signers) {
2055	    $signer = deduplicate_email($signer);
2056	}
2057	vcs_assign("commits", $total_commits, @signers);
2058    } else {
2059	foreach my $signer (@signers) {
2060	    $signer = deduplicate_email($signer);
2061	}
2062	vcs_assign("modified commits", $total_commits, @signers);
2063    }
2064}
2065
2066sub uniq {
2067    my (@parms) = @_;
2068
2069    my %saw;
2070    @parms = grep(!$saw{$_}++, @parms);
2071    return @parms;
2072}
2073
2074sub sort_and_uniq {
2075    my (@parms) = @_;
2076
2077    my %saw;
2078    @parms = sort @parms;
2079    @parms = grep(!$saw{$_}++, @parms);
2080    return @parms;
2081}
2082
2083sub clean_file_emails {
2084    my (@file_emails) = @_;
2085    my @fmt_emails = ();
2086
2087    foreach my $email (@file_emails) {
2088	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2089	my ($name, $address) = parse_email($email);
2090	if ($name eq '"[,\.]"') {
2091	    $name = "";
2092	}
2093
2094	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2095	if (@nw > 2) {
2096	    my $first = $nw[@nw - 3];
2097	    my $middle = $nw[@nw - 2];
2098	    my $last = $nw[@nw - 1];
2099
2100	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2101		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2102		(length($middle) == 1 ||
2103		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2104		$name = "$first $middle $last";
2105	    } else {
2106		$name = "$middle $last";
2107	    }
2108	}
2109
2110	if (substr($name, -1) =~ /[,\.]/) {
2111	    $name = substr($name, 0, length($name) - 1);
2112	} elsif (substr($name, -2) =~ /[,\.]"/) {
2113	    $name = substr($name, 0, length($name) - 2) . '"';
2114	}
2115
2116	if (substr($name, 0, 1) =~ /[,\.]/) {
2117	    $name = substr($name, 1, length($name) - 1);
2118	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2119	    $name = '"' . substr($name, 2, length($name) - 2);
2120	}
2121
2122	my $fmt_email = format_email($name, $address, $email_usename);
2123	push(@fmt_emails, $fmt_email);
2124    }
2125    return @fmt_emails;
2126}
2127
2128sub merge_email {
2129    my @lines;
2130    my %saw;
2131
2132    for (@_) {
2133	my ($address, $role) = @$_;
2134	if (!$saw{$address}) {
2135	    if ($output_roles) {
2136		push(@lines, "$address ($role)");
2137	    } else {
2138		push(@lines, $address);
2139	    }
2140	    $saw{$address} = 1;
2141	}
2142    }
2143
2144    return @lines;
2145}
2146
2147sub output {
2148    my (@parms) = @_;
2149
2150    if ($output_multiline) {
2151	foreach my $line (@parms) {
2152	    print("${line}\n");
2153	}
2154    } else {
2155	print(join($output_separator, @parms));
2156	print("\n");
2157    }
2158}
2159
2160my $rfc822re;
2161
2162sub make_rfc822re {
2163#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2164#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2165#   This regexp will only work on addresses which have had comments stripped
2166#   and replaced with rfc822_lwsp.
2167
2168    my $specials = '()<>@,;:\\\\".\\[\\]';
2169    my $controls = '\\000-\\037\\177';
2170
2171    my $dtext = "[^\\[\\]\\r\\\\]";
2172    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2173
2174    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2175
2176#   Use zero-width assertion to spot the limit of an atom.  A simple
2177#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2178    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2179    my $word = "(?:$atom|$quoted_string)";
2180    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2181
2182    my $sub_domain = "(?:$atom|$domain_literal)";
2183    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2184
2185    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2186
2187    my $phrase = "$word*";
2188    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2189    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2190    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2191
2192    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2193    my $address = "(?:$mailbox|$group)";
2194
2195    return "$rfc822_lwsp*$address";
2196}
2197
2198sub rfc822_strip_comments {
2199    my $s = shift;
2200#   Recursively remove comments, and replace with a single space.  The simpler
2201#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2202#   chars in atoms, for example.
2203
2204    while ($s =~ s/^((?:[^"\\]|\\.)*
2205                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2206                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2207    return $s;
2208}
2209
2210#   valid: returns true if the parameter is an RFC822 valid address
2211#
2212sub rfc822_valid {
2213    my $s = rfc822_strip_comments(shift);
2214
2215    if (!$rfc822re) {
2216        $rfc822re = make_rfc822re();
2217    }
2218
2219    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2220}
2221
2222#   validlist: In scalar context, returns true if the parameter is an RFC822
2223#              valid list of addresses.
2224#
2225#              In list context, returns an empty list on failure (an invalid
2226#              address was found); otherwise a list whose first element is the
2227#              number of addresses found and whose remaining elements are the
2228#              addresses.  This is needed to disambiguate failure (invalid)
2229#              from success with no addresses found, because an empty string is
2230#              a valid list.
2231
2232sub rfc822_validlist {
2233    my $s = rfc822_strip_comments(shift);
2234
2235    if (!$rfc822re) {
2236        $rfc822re = make_rfc822re();
2237    }
2238    # * null list items are valid according to the RFC
2239    # * the '1' business is to aid in distinguishing failure from no results
2240
2241    my @r;
2242    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2243	$s =~ m/^$rfc822_char*$/) {
2244        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2245            push(@r, $1);
2246        }
2247        return wantarray ? (scalar(@r), @r) : 1;
2248    }
2249    return wantarray ? () : 0;
2250}
2251