xref: /openbmc/linux/scripts/get_maintainer.pl (revision afc98d90)
1#!/usr/bin/perl -w
2# (c) 2007, Joe Perches <joe@perches.com>
3#           created from checkpatch.pl
4#
5# Print selected MAINTAINERS information for
6# the files modified in a patch or for a file
7#
8# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
9#        perl scripts/get_maintainer.pl [OPTIONS] -f <file>
10#
11# Licensed under the terms of the GNU GPL License version 2
12
13use strict;
14
15my $P = $0;
16my $V = '0.26';
17
18use Getopt::Long qw(:config no_auto_abbrev);
19
20my $lk_path = "./";
21my $email = 1;
22my $email_usename = 1;
23my $email_maintainer = 1;
24my $email_list = 1;
25my $email_subscriber_list = 0;
26my $email_git_penguin_chiefs = 0;
27my $email_git = 0;
28my $email_git_all_signature_types = 0;
29my $email_git_blame = 0;
30my $email_git_blame_signatures = 1;
31my $email_git_fallback = 1;
32my $email_git_min_signatures = 1;
33my $email_git_max_maintainers = 5;
34my $email_git_min_percent = 5;
35my $email_git_since = "1-year-ago";
36my $email_hg_since = "-365";
37my $interactive = 0;
38my $email_remove_duplicates = 1;
39my $email_use_mailmap = 1;
40my $output_multiline = 1;
41my $output_separator = ", ";
42my $output_roles = 0;
43my $output_rolestats = 1;
44my $scm = 0;
45my $web = 0;
46my $subsystem = 0;
47my $status = 0;
48my $keywords = 1;
49my $sections = 0;
50my $file_emails = 0;
51my $from_filename = 0;
52my $pattern_depth = 0;
53my $version = 0;
54my $help = 0;
55
56my $vcs_used = 0;
57
58my $exit = 0;
59
60my %commit_author_hash;
61my %commit_signer_hash;
62
63my @penguin_chief = ();
64push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
65#Andrew wants in on most everything - 2009/01/14
66#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
67
68my @penguin_chief_names = ();
69foreach my $chief (@penguin_chief) {
70    if ($chief =~ m/^(.*):(.*)/) {
71	my $chief_name = $1;
72	my $chief_addr = $2;
73	push(@penguin_chief_names, $chief_name);
74    }
75}
76my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
77
78# Signature types of people who are either
79# 	a) responsible for the code in question, or
80# 	b) familiar enough with it to give relevant feedback
81my @signature_tags = ();
82push(@signature_tags, "Signed-off-by:");
83push(@signature_tags, "Reviewed-by:");
84push(@signature_tags, "Acked-by:");
85
86my $signature_pattern = "\(" . join("|", @signature_tags) . "\)";
87
88# rfc822 email address - preloaded methods go here.
89my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
90my $rfc822_char = '[\\000-\\377]';
91
92# VCS command support: class-like functions and strings
93
94my %VCS_cmds;
95
96my %VCS_cmds_git = (
97    "execute_cmd" => \&git_execute_cmd,
98    "available" => '(which("git") ne "") && (-e ".git")',
99    "find_signers_cmd" =>
100	"git log --no-color --follow --since=\$email_git_since " .
101	    '--numstat --no-merges ' .
102	    '--format="GitCommit: %H%n' .
103		      'GitAuthor: %an <%ae>%n' .
104		      'GitDate: %aD%n' .
105		      'GitSubject: %s%n' .
106		      '%b%n"' .
107	    " -- \$file",
108    "find_commit_signers_cmd" =>
109	"git log --no-color " .
110	    '--numstat ' .
111	    '--format="GitCommit: %H%n' .
112		      'GitAuthor: %an <%ae>%n' .
113		      'GitDate: %aD%n' .
114		      'GitSubject: %s%n' .
115		      '%b%n"' .
116	    " -1 \$commit",
117    "find_commit_author_cmd" =>
118	"git log --no-color " .
119	    '--numstat ' .
120	    '--format="GitCommit: %H%n' .
121		      'GitAuthor: %an <%ae>%n' .
122		      'GitDate: %aD%n' .
123		      'GitSubject: %s%n"' .
124	    " -1 \$commit",
125    "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
126    "blame_file_cmd" => "git blame -l \$file",
127    "commit_pattern" => "^GitCommit: ([0-9a-f]{40,40})",
128    "blame_commit_pattern" => "^([0-9a-f]+) ",
129    "author_pattern" => "^GitAuthor: (.*)",
130    "subject_pattern" => "^GitSubject: (.*)",
131    "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$",
132);
133
134my %VCS_cmds_hg = (
135    "execute_cmd" => \&hg_execute_cmd,
136    "available" => '(which("hg") ne "") && (-d ".hg")',
137    "find_signers_cmd" =>
138	"hg log --date=\$email_hg_since " .
139	    "--template='HgCommit: {node}\\n" .
140	                "HgAuthor: {author}\\n" .
141			"HgSubject: {desc}\\n'" .
142	    " -- \$file",
143    "find_commit_signers_cmd" =>
144	"hg log " .
145	    "--template='HgSubject: {desc}\\n'" .
146	    " -r \$commit",
147    "find_commit_author_cmd" =>
148	"hg log " .
149	    "--template='HgCommit: {node}\\n" .
150		        "HgAuthor: {author}\\n" .
151			"HgSubject: {desc|firstline}\\n'" .
152	    " -r \$commit",
153    "blame_range_cmd" => "",		# not supported
154    "blame_file_cmd" => "hg blame -n \$file",
155    "commit_pattern" => "^HgCommit: ([0-9a-f]{40,40})",
156    "blame_commit_pattern" => "^([ 0-9a-f]+):",
157    "author_pattern" => "^HgAuthor: (.*)",
158    "subject_pattern" => "^HgSubject: (.*)",
159    "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$",
160);
161
162my $conf = which_conf(".get_maintainer.conf");
163if (-f $conf) {
164    my @conf_args;
165    open(my $conffile, '<', "$conf")
166	or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
167
168    while (<$conffile>) {
169	my $line = $_;
170
171	$line =~ s/\s*\n?$//g;
172	$line =~ s/^\s*//g;
173	$line =~ s/\s+/ /g;
174
175	next if ($line =~ m/^\s*#/);
176	next if ($line =~ m/^\s*$/);
177
178	my @words = split(" ", $line);
179	foreach my $word (@words) {
180	    last if ($word =~ m/^#/);
181	    push (@conf_args, $word);
182	}
183    }
184    close($conffile);
185    unshift(@ARGV, @conf_args) if @conf_args;
186}
187
188if (!GetOptions(
189		'email!' => \$email,
190		'git!' => \$email_git,
191		'git-all-signature-types!' => \$email_git_all_signature_types,
192		'git-blame!' => \$email_git_blame,
193		'git-blame-signatures!' => \$email_git_blame_signatures,
194		'git-fallback!' => \$email_git_fallback,
195		'git-chief-penguins!' => \$email_git_penguin_chiefs,
196		'git-min-signatures=i' => \$email_git_min_signatures,
197		'git-max-maintainers=i' => \$email_git_max_maintainers,
198		'git-min-percent=i' => \$email_git_min_percent,
199		'git-since=s' => \$email_git_since,
200		'hg-since=s' => \$email_hg_since,
201		'i|interactive!' => \$interactive,
202		'remove-duplicates!' => \$email_remove_duplicates,
203		'mailmap!' => \$email_use_mailmap,
204		'm!' => \$email_maintainer,
205		'n!' => \$email_usename,
206		'l!' => \$email_list,
207		's!' => \$email_subscriber_list,
208		'multiline!' => \$output_multiline,
209		'roles!' => \$output_roles,
210		'rolestats!' => \$output_rolestats,
211		'separator=s' => \$output_separator,
212		'subsystem!' => \$subsystem,
213		'status!' => \$status,
214		'scm!' => \$scm,
215		'web!' => \$web,
216		'pattern-depth=i' => \$pattern_depth,
217		'k|keywords!' => \$keywords,
218		'sections!' => \$sections,
219		'fe|file-emails!' => \$file_emails,
220		'f|file' => \$from_filename,
221		'v|version' => \$version,
222		'h|help|usage' => \$help,
223		)) {
224    die "$P: invalid argument - use --help if necessary\n";
225}
226
227if ($help != 0) {
228    usage();
229    exit 0;
230}
231
232if ($version != 0) {
233    print("${P} ${V}\n");
234    exit 0;
235}
236
237if (-t STDIN && !@ARGV) {
238    # We're talking to a terminal, but have no command line arguments.
239    die "$P: missing patchfile or -f file - use --help if necessary\n";
240}
241
242$output_multiline = 0 if ($output_separator ne ", ");
243$output_rolestats = 1 if ($interactive);
244$output_roles = 1 if ($output_rolestats);
245
246if ($sections) {
247    $email = 0;
248    $email_list = 0;
249    $scm = 0;
250    $status = 0;
251    $subsystem = 0;
252    $web = 0;
253    $keywords = 0;
254    $interactive = 0;
255} else {
256    my $selections = $email + $scm + $status + $subsystem + $web;
257    if ($selections == 0) {
258	die "$P:  Missing required option: email, scm, status, subsystem or web\n";
259    }
260}
261
262if ($email &&
263    ($email_maintainer + $email_list + $email_subscriber_list +
264     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
265    die "$P: Please select at least 1 email option\n";
266}
267
268if (!top_of_kernel_tree($lk_path)) {
269    die "$P: The current directory does not appear to be "
270	. "a linux kernel source tree.\n";
271}
272
273## Read MAINTAINERS for type/value pairs
274
275my @typevalue = ();
276my %keyword_hash;
277
278open (my $maint, '<', "${lk_path}MAINTAINERS")
279    or die "$P: Can't open MAINTAINERS: $!\n";
280while (<$maint>) {
281    my $line = $_;
282
283    if ($line =~ m/^(\C):\s*(.*)/) {
284	my $type = $1;
285	my $value = $2;
286
287	##Filename pattern matching
288	if ($type eq "F" || $type eq "X") {
289	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
290	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
291	    $value =~ s/\?/\./g;         ##Convert ? to .
292	    ##if pattern is a directory and it lacks a trailing slash, add one
293	    if ((-d $value)) {
294		$value =~ s@([^/])$@$1/@;
295	    }
296	} elsif ($type eq "K") {
297	    $keyword_hash{@typevalue} = $value;
298	}
299	push(@typevalue, "$type:$value");
300    } elsif (!/^(\s)*$/) {
301	$line =~ s/\n$//g;
302	push(@typevalue, $line);
303    }
304}
305close($maint);
306
307
308#
309# Read mail address map
310#
311
312my $mailmap;
313
314read_mailmap();
315
316sub read_mailmap {
317    $mailmap = {
318	names => {},
319	addresses => {}
320    };
321
322    return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
323
324    open(my $mailmap_file, '<', "${lk_path}.mailmap")
325	or warn "$P: Can't open .mailmap: $!\n";
326
327    while (<$mailmap_file>) {
328	s/#.*$//; #strip comments
329	s/^\s+|\s+$//g; #trim
330
331	next if (/^\s*$/); #skip empty lines
332	#entries have one of the following formats:
333	# name1 <mail1>
334	# <mail1> <mail2>
335	# name1 <mail1> <mail2>
336	# name1 <mail1> name2 <mail2>
337	# (see man git-shortlog)
338
339	if (/^([^<]+)<([^>]+)>$/) {
340	    my $real_name = $1;
341	    my $address = $2;
342
343	    $real_name =~ s/\s+$//;
344	    ($real_name, $address) = parse_email("$real_name <$address>");
345	    $mailmap->{names}->{$address} = $real_name;
346
347	} elsif (/^<([^>]+)>\s*<([^>]+)>$/) {
348	    my $real_address = $1;
349	    my $wrong_address = $2;
350
351	    $mailmap->{addresses}->{$wrong_address} = $real_address;
352
353	} elsif (/^(.+)<([^>]+)>\s*<([^>]+)>$/) {
354	    my $real_name = $1;
355	    my $real_address = $2;
356	    my $wrong_address = $3;
357
358	    $real_name =~ s/\s+$//;
359	    ($real_name, $real_address) =
360		parse_email("$real_name <$real_address>");
361	    $mailmap->{names}->{$wrong_address} = $real_name;
362	    $mailmap->{addresses}->{$wrong_address} = $real_address;
363
364	} elsif (/^(.+)<([^>]+)>\s*(.+)\s*<([^>]+)>$/) {
365	    my $real_name = $1;
366	    my $real_address = $2;
367	    my $wrong_name = $3;
368	    my $wrong_address = $4;
369
370	    $real_name =~ s/\s+$//;
371	    ($real_name, $real_address) =
372		parse_email("$real_name <$real_address>");
373
374	    $wrong_name =~ s/\s+$//;
375	    ($wrong_name, $wrong_address) =
376		parse_email("$wrong_name <$wrong_address>");
377
378	    my $wrong_email = format_email($wrong_name, $wrong_address, 1);
379	    $mailmap->{names}->{$wrong_email} = $real_name;
380	    $mailmap->{addresses}->{$wrong_email} = $real_address;
381	}
382    }
383    close($mailmap_file);
384}
385
386## use the filenames on the command line or find the filenames in the patchfiles
387
388my @files = ();
389my @range = ();
390my @keyword_tvi = ();
391my @file_emails = ();
392
393if (!@ARGV) {
394    push(@ARGV, "&STDIN");
395}
396
397foreach my $file (@ARGV) {
398    if ($file ne "&STDIN") {
399	##if $file is a directory and it lacks a trailing slash, add one
400	if ((-d $file)) {
401	    $file =~ s@([^/])$@$1/@;
402	} elsif (!(-f $file)) {
403	    die "$P: file '${file}' not found\n";
404	}
405    }
406    if ($from_filename) {
407	push(@files, $file);
408	if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
409	    open(my $f, '<', $file)
410		or die "$P: Can't open $file: $!\n";
411	    my $text = do { local($/) ; <$f> };
412	    close($f);
413	    if ($keywords) {
414		foreach my $line (keys %keyword_hash) {
415		    if ($text =~ m/$keyword_hash{$line}/x) {
416			push(@keyword_tvi, $line);
417		    }
418		}
419	    }
420	    if ($file_emails) {
421		my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
422		push(@file_emails, clean_file_emails(@poss_addr));
423	    }
424	}
425    } else {
426	my $file_cnt = @files;
427	my $lastfile;
428
429	open(my $patch, "< $file")
430	    or die "$P: Can't open $file: $!\n";
431
432	# We can check arbitrary information before the patch
433	# like the commit message, mail headers, etc...
434	# This allows us to match arbitrary keywords against any part
435	# of a git format-patch generated file (subject tags, etc...)
436
437	my $patch_prefix = "";			#Parsing the intro
438
439	while (<$patch>) {
440	    my $patch_line = $_;
441	    if (m/^\+\+\+\s+(\S+)/ or m/^---\s+(\S+)/) {
442		my $filename = $1;
443		$filename =~ s@^[^/]*/@@;
444		$filename =~ s@\n@@;
445		$lastfile = $filename;
446		push(@files, $filename);
447		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
448	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
449		if ($email_git_blame) {
450		    push(@range, "$lastfile:$1:$2");
451		}
452	    } elsif ($keywords) {
453		foreach my $line (keys %keyword_hash) {
454		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
455			push(@keyword_tvi, $line);
456		    }
457		}
458	    }
459	}
460	close($patch);
461
462	if ($file_cnt == @files) {
463	    warn "$P: file '${file}' doesn't appear to be a patch.  "
464		. "Add -f to options?\n";
465	}
466	@files = sort_and_uniq(@files);
467    }
468}
469
470@file_emails = uniq(@file_emails);
471
472my %email_hash_name;
473my %email_hash_address;
474my @email_to = ();
475my %hash_list_to;
476my @list_to = ();
477my @scm = ();
478my @web = ();
479my @subsystem = ();
480my @status = ();
481my %deduplicate_name_hash = ();
482my %deduplicate_address_hash = ();
483
484my @maintainers = get_maintainers();
485
486if (@maintainers) {
487    @maintainers = merge_email(@maintainers);
488    output(@maintainers);
489}
490
491if ($scm) {
492    @scm = uniq(@scm);
493    output(@scm);
494}
495
496if ($status) {
497    @status = uniq(@status);
498    output(@status);
499}
500
501if ($subsystem) {
502    @subsystem = uniq(@subsystem);
503    output(@subsystem);
504}
505
506if ($web) {
507    @web = uniq(@web);
508    output(@web);
509}
510
511exit($exit);
512
513sub range_is_maintained {
514    my ($start, $end) = @_;
515
516    for (my $i = $start; $i < $end; $i++) {
517	my $line = $typevalue[$i];
518	if ($line =~ m/^(\C):\s*(.*)/) {
519	    my $type = $1;
520	    my $value = $2;
521	    if ($type eq 'S') {
522		if ($value =~ /(maintain|support)/i) {
523		    return 1;
524		}
525	    }
526	}
527    }
528    return 0;
529}
530
531sub range_has_maintainer {
532    my ($start, $end) = @_;
533
534    for (my $i = $start; $i < $end; $i++) {
535	my $line = $typevalue[$i];
536	if ($line =~ m/^(\C):\s*(.*)/) {
537	    my $type = $1;
538	    my $value = $2;
539	    if ($type eq 'M') {
540		return 1;
541	    }
542	}
543    }
544    return 0;
545}
546
547sub get_maintainers {
548    %email_hash_name = ();
549    %email_hash_address = ();
550    %commit_author_hash = ();
551    %commit_signer_hash = ();
552    @email_to = ();
553    %hash_list_to = ();
554    @list_to = ();
555    @scm = ();
556    @web = ();
557    @subsystem = ();
558    @status = ();
559    %deduplicate_name_hash = ();
560    %deduplicate_address_hash = ();
561    if ($email_git_all_signature_types) {
562	$signature_pattern = "(.+?)[Bb][Yy]:";
563    } else {
564	$signature_pattern = "\(" . join("|", @signature_tags) . "\)";
565    }
566
567    # Find responsible parties
568
569    my %exact_pattern_match_hash = ();
570
571    foreach my $file (@files) {
572
573	my %hash;
574	my $tvi = find_first_section();
575	while ($tvi < @typevalue) {
576	    my $start = find_starting_index($tvi);
577	    my $end = find_ending_index($tvi);
578	    my $exclude = 0;
579	    my $i;
580
581	    #Do not match excluded file patterns
582
583	    for ($i = $start; $i < $end; $i++) {
584		my $line = $typevalue[$i];
585		if ($line =~ m/^(\C):\s*(.*)/) {
586		    my $type = $1;
587		    my $value = $2;
588		    if ($type eq 'X') {
589			if (file_match_pattern($file, $value)) {
590			    $exclude = 1;
591			    last;
592			}
593		    }
594		}
595	    }
596
597	    if (!$exclude) {
598		for ($i = $start; $i < $end; $i++) {
599		    my $line = $typevalue[$i];
600		    if ($line =~ m/^(\C):\s*(.*)/) {
601			my $type = $1;
602			my $value = $2;
603			if ($type eq 'F') {
604			    if (file_match_pattern($file, $value)) {
605				my $value_pd = ($value =~ tr@/@@);
606				my $file_pd = ($file  =~ tr@/@@);
607				$value_pd++ if (substr($value,-1,1) ne "/");
608				$value_pd = -1 if ($value =~ /^\.\*/);
609				if ($value_pd >= $file_pd &&
610				    range_is_maintained($start, $end) &&
611				    range_has_maintainer($start, $end)) {
612				    $exact_pattern_match_hash{$file} = 1;
613				}
614				if ($pattern_depth == 0 ||
615				    (($file_pd - $value_pd) < $pattern_depth)) {
616				    $hash{$tvi} = $value_pd;
617				}
618			    }
619			} elsif ($type eq 'N') {
620			    if ($file =~ m/$value/x) {
621				$hash{$tvi} = 0;
622			    }
623			}
624		    }
625		}
626	    }
627	    $tvi = $end + 1;
628	}
629
630	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
631	    add_categories($line);
632	    if ($sections) {
633		my $i;
634		my $start = find_starting_index($line);
635		my $end = find_ending_index($line);
636		for ($i = $start; $i < $end; $i++) {
637		    my $line = $typevalue[$i];
638		    if ($line =~ /^[FX]:/) {		##Restore file patterns
639			$line =~ s/([^\\])\.([^\*])/$1\?$2/g;
640			$line =~ s/([^\\])\.$/$1\?/g;	##Convert . back to ?
641			$line =~ s/\\\./\./g;       	##Convert \. to .
642			$line =~ s/\.\*/\*/g;       	##Convert .* to *
643		    }
644		    $line =~ s/^([A-Z]):/$1:\t/g;
645		    print("$line\n");
646		}
647		print("\n");
648	    }
649	}
650    }
651
652    if ($keywords) {
653	@keyword_tvi = sort_and_uniq(@keyword_tvi);
654	foreach my $line (@keyword_tvi) {
655	    add_categories($line);
656	}
657    }
658
659    foreach my $email (@email_to, @list_to) {
660	$email->[0] = deduplicate_email($email->[0]);
661    }
662
663    foreach my $file (@files) {
664	if ($email &&
665	    ($email_git || ($email_git_fallback &&
666			    !$exact_pattern_match_hash{$file}))) {
667	    vcs_file_signoffs($file);
668	}
669	if ($email && $email_git_blame) {
670	    vcs_file_blame($file);
671	}
672    }
673
674    if ($email) {
675	foreach my $chief (@penguin_chief) {
676	    if ($chief =~ m/^(.*):(.*)/) {
677		my $email_address;
678
679		$email_address = format_email($1, $2, $email_usename);
680		if ($email_git_penguin_chiefs) {
681		    push(@email_to, [$email_address, 'chief penguin']);
682		} else {
683		    @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
684		}
685	    }
686	}
687
688	foreach my $email (@file_emails) {
689	    my ($name, $address) = parse_email($email);
690
691	    my $tmp_email = format_email($name, $address, $email_usename);
692	    push_email_address($tmp_email, '');
693	    add_role($tmp_email, 'in file');
694	}
695    }
696
697    my @to = ();
698    if ($email || $email_list) {
699	if ($email) {
700	    @to = (@to, @email_to);
701	}
702	if ($email_list) {
703	    @to = (@to, @list_to);
704	}
705    }
706
707    if ($interactive) {
708	@to = interactive_get_maintainers(\@to);
709    }
710
711    return @to;
712}
713
714sub file_match_pattern {
715    my ($file, $pattern) = @_;
716    if (substr($pattern, -1) eq "/") {
717	if ($file =~ m@^$pattern@) {
718	    return 1;
719	}
720    } else {
721	if ($file =~ m@^$pattern@) {
722	    my $s1 = ($file =~ tr@/@@);
723	    my $s2 = ($pattern =~ tr@/@@);
724	    if ($s1 == $s2) {
725		return 1;
726	    }
727	}
728    }
729    return 0;
730}
731
732sub usage {
733    print <<EOT;
734usage: $P [options] patchfile
735       $P [options] -f file|directory
736version: $V
737
738MAINTAINER field selection options:
739  --email => print email address(es) if any
740    --git => include recent git \*-by: signers
741    --git-all-signature-types => include signers regardless of signature type
742        or use only ${signature_pattern} signers (default: $email_git_all_signature_types)
743    --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
744    --git-chief-penguins => include ${penguin_chiefs}
745    --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
746    --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
747    --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
748    --git-blame => use git blame to find modified commits for patch or file
749    --git-since => git history to use (default: $email_git_since)
750    --hg-since => hg history to use (default: $email_hg_since)
751    --interactive => display a menu (mostly useful if used with the --git option)
752    --m => include maintainer(s) if any
753    --n => include name 'Full Name <addr\@domain.tld>'
754    --l => include list(s) if any
755    --s => include subscriber only list(s) if any
756    --remove-duplicates => minimize duplicate email names/addresses
757    --roles => show roles (status:subsystem, git-signer, list, etc...)
758    --rolestats => show roles and statistics (commits/total_commits, %)
759    --file-emails => add email addresses found in -f file (default: 0 (off))
760  --scm => print SCM tree(s) if any
761  --status => print status if any
762  --subsystem => print subsystem name if any
763  --web => print website(s) if any
764
765Output type options:
766  --separator [, ] => separator for multiple entries on 1 line
767    using --separator also sets --nomultiline if --separator is not [, ]
768  --multiline => print 1 entry per line
769
770Other options:
771  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
772  --keywords => scan patch for keywords (default: $keywords)
773  --sections => print all of the subsystem sections with pattern matches
774  --mailmap => use .mailmap file (default: $email_use_mailmap)
775  --version => show version
776  --help => show this help information
777
778Default options:
779  [--email --nogit --git-fallback --m --n --l --multiline -pattern-depth=0
780   --remove-duplicates --rolestats]
781
782Notes:
783  Using "-f directory" may give unexpected results:
784      Used with "--git", git signators for _all_ files in and below
785          directory are examined as git recurses directories.
786          Any specified X: (exclude) pattern matches are _not_ ignored.
787      Used with "--nogit", directory is used as a pattern match,
788          no individual file within the directory or subdirectory
789          is matched.
790      Used with "--git-blame", does not iterate all files in directory
791  Using "--git-blame" is slow and may add old committers and authors
792      that are no longer active maintainers to the output.
793  Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
794      other automated tools that expect only ["name"] <email address>
795      may not work because of additional output after <email address>.
796  Using "--rolestats" and "--git-blame" shows the #/total=% commits,
797      not the percentage of the entire file authored.  # of commits is
798      not a good measure of amount of code authored.  1 major commit may
799      contain a thousand lines, 5 trivial commits may modify a single line.
800  If git is not installed, but mercurial (hg) is installed and an .hg
801      repository exists, the following options apply to mercurial:
802          --git,
803          --git-min-signatures, --git-max-maintainers, --git-min-percent, and
804          --git-blame
805      Use --hg-since not --git-since to control date selection
806  File ".get_maintainer.conf", if it exists in the linux kernel source root
807      directory, can change whatever get_maintainer defaults are desired.
808      Entries in this file can be any command line argument.
809      This file is prepended to any additional command line arguments.
810      Multiple lines and # comments are allowed.
811EOT
812}
813
814sub top_of_kernel_tree {
815    my ($lk_path) = @_;
816
817    if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
818	$lk_path .= "/";
819    }
820    if (   (-f "${lk_path}COPYING")
821	&& (-f "${lk_path}CREDITS")
822	&& (-f "${lk_path}Kbuild")
823	&& (-f "${lk_path}MAINTAINERS")
824	&& (-f "${lk_path}Makefile")
825	&& (-f "${lk_path}README")
826	&& (-d "${lk_path}Documentation")
827	&& (-d "${lk_path}arch")
828	&& (-d "${lk_path}include")
829	&& (-d "${lk_path}drivers")
830	&& (-d "${lk_path}fs")
831	&& (-d "${lk_path}init")
832	&& (-d "${lk_path}ipc")
833	&& (-d "${lk_path}kernel")
834	&& (-d "${lk_path}lib")
835	&& (-d "${lk_path}scripts")) {
836	return 1;
837    }
838    return 0;
839}
840
841sub parse_email {
842    my ($formatted_email) = @_;
843
844    my $name = "";
845    my $address = "";
846
847    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
848	$name = $1;
849	$address = $2;
850    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
851	$address = $1;
852    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
853	$address = $1;
854    }
855
856    $name =~ s/^\s+|\s+$//g;
857    $name =~ s/^\"|\"$//g;
858    $address =~ s/^\s+|\s+$//g;
859
860    if ($name =~ /[^\w \-]/i) {  	 ##has "must quote" chars
861	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
862	$name = "\"$name\"";
863    }
864
865    return ($name, $address);
866}
867
868sub format_email {
869    my ($name, $address, $usename) = @_;
870
871    my $formatted_email;
872
873    $name =~ s/^\s+|\s+$//g;
874    $name =~ s/^\"|\"$//g;
875    $address =~ s/^\s+|\s+$//g;
876
877    if ($name =~ /[^\w \-]/i) {          ##has "must quote" chars
878	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
879	$name = "\"$name\"";
880    }
881
882    if ($usename) {
883	if ("$name" eq "") {
884	    $formatted_email = "$address";
885	} else {
886	    $formatted_email = "$name <$address>";
887	}
888    } else {
889	$formatted_email = $address;
890    }
891
892    return $formatted_email;
893}
894
895sub find_first_section {
896    my $index = 0;
897
898    while ($index < @typevalue) {
899	my $tv = $typevalue[$index];
900	if (($tv =~ m/^(\C):\s*(.*)/)) {
901	    last;
902	}
903	$index++;
904    }
905
906    return $index;
907}
908
909sub find_starting_index {
910    my ($index) = @_;
911
912    while ($index > 0) {
913	my $tv = $typevalue[$index];
914	if (!($tv =~ m/^(\C):\s*(.*)/)) {
915	    last;
916	}
917	$index--;
918    }
919
920    return $index;
921}
922
923sub find_ending_index {
924    my ($index) = @_;
925
926    while ($index < @typevalue) {
927	my $tv = $typevalue[$index];
928	if (!($tv =~ m/^(\C):\s*(.*)/)) {
929	    last;
930	}
931	$index++;
932    }
933
934    return $index;
935}
936
937sub get_maintainer_role {
938    my ($index) = @_;
939
940    my $i;
941    my $start = find_starting_index($index);
942    my $end = find_ending_index($index);
943
944    my $role = "unknown";
945    my $subsystem = $typevalue[$start];
946    if (length($subsystem) > 20) {
947	$subsystem = substr($subsystem, 0, 17);
948	$subsystem =~ s/\s*$//;
949	$subsystem = $subsystem . "...";
950    }
951
952    for ($i = $start + 1; $i < $end; $i++) {
953	my $tv = $typevalue[$i];
954	if ($tv =~ m/^(\C):\s*(.*)/) {
955	    my $ptype = $1;
956	    my $pvalue = $2;
957	    if ($ptype eq "S") {
958		$role = $pvalue;
959	    }
960	}
961    }
962
963    $role = lc($role);
964    if      ($role eq "supported") {
965	$role = "supporter";
966    } elsif ($role eq "maintained") {
967	$role = "maintainer";
968    } elsif ($role eq "odd fixes") {
969	$role = "odd fixer";
970    } elsif ($role eq "orphan") {
971	$role = "orphan minder";
972    } elsif ($role eq "obsolete") {
973	$role = "obsolete minder";
974    } elsif ($role eq "buried alive in reporters") {
975	$role = "chief penguin";
976    }
977
978    return $role . ":" . $subsystem;
979}
980
981sub get_list_role {
982    my ($index) = @_;
983
984    my $i;
985    my $start = find_starting_index($index);
986    my $end = find_ending_index($index);
987
988    my $subsystem = $typevalue[$start];
989    if (length($subsystem) > 20) {
990	$subsystem = substr($subsystem, 0, 17);
991	$subsystem =~ s/\s*$//;
992	$subsystem = $subsystem . "...";
993    }
994
995    if ($subsystem eq "THE REST") {
996	$subsystem = "";
997    }
998
999    return $subsystem;
1000}
1001
1002sub add_categories {
1003    my ($index) = @_;
1004
1005    my $i;
1006    my $start = find_starting_index($index);
1007    my $end = find_ending_index($index);
1008
1009    push(@subsystem, $typevalue[$start]);
1010
1011    for ($i = $start + 1; $i < $end; $i++) {
1012	my $tv = $typevalue[$i];
1013	if ($tv =~ m/^(\C):\s*(.*)/) {
1014	    my $ptype = $1;
1015	    my $pvalue = $2;
1016	    if ($ptype eq "L") {
1017		my $list_address = $pvalue;
1018		my $list_additional = "";
1019		my $list_role = get_list_role($i);
1020
1021		if ($list_role ne "") {
1022		    $list_role = ":" . $list_role;
1023		}
1024		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
1025		    $list_address = $1;
1026		    $list_additional = $2;
1027		}
1028		if ($list_additional =~ m/subscribers-only/) {
1029		    if ($email_subscriber_list) {
1030			if (!$hash_list_to{lc($list_address)}) {
1031			    $hash_list_to{lc($list_address)} = 1;
1032			    push(@list_to, [$list_address,
1033					    "subscriber list${list_role}"]);
1034			}
1035		    }
1036		} else {
1037		    if ($email_list) {
1038			if (!$hash_list_to{lc($list_address)}) {
1039			    $hash_list_to{lc($list_address)} = 1;
1040			    if ($list_additional =~ m/moderated/) {
1041				push(@list_to, [$list_address,
1042						"moderated list${list_role}"]);
1043			    } else {
1044				push(@list_to, [$list_address,
1045						"open list${list_role}"]);
1046			    }
1047			}
1048		    }
1049		}
1050	    } elsif ($ptype eq "M") {
1051		my ($name, $address) = parse_email($pvalue);
1052		if ($name eq "") {
1053		    if ($i > 0) {
1054			my $tv = $typevalue[$i - 1];
1055			if ($tv =~ m/^(\C):\s*(.*)/) {
1056			    if ($1 eq "P") {
1057				$name = $2;
1058				$pvalue = format_email($name, $address, $email_usename);
1059			    }
1060			}
1061		    }
1062		}
1063		if ($email_maintainer) {
1064		    my $role = get_maintainer_role($i);
1065		    push_email_addresses($pvalue, $role);
1066		}
1067	    } elsif ($ptype eq "T") {
1068		push(@scm, $pvalue);
1069	    } elsif ($ptype eq "W") {
1070		push(@web, $pvalue);
1071	    } elsif ($ptype eq "S") {
1072		push(@status, $pvalue);
1073	    }
1074	}
1075    }
1076}
1077
1078sub email_inuse {
1079    my ($name, $address) = @_;
1080
1081    return 1 if (($name eq "") && ($address eq ""));
1082    return 1 if (($name ne "") && exists($email_hash_name{lc($name)}));
1083    return 1 if (($address ne "") && exists($email_hash_address{lc($address)}));
1084
1085    return 0;
1086}
1087
1088sub push_email_address {
1089    my ($line, $role) = @_;
1090
1091    my ($name, $address) = parse_email($line);
1092
1093    if ($address eq "") {
1094	return 0;
1095    }
1096
1097    if (!$email_remove_duplicates) {
1098	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1099    } elsif (!email_inuse($name, $address)) {
1100	push(@email_to, [format_email($name, $address, $email_usename), $role]);
1101	$email_hash_name{lc($name)}++ if ($name ne "");
1102	$email_hash_address{lc($address)}++;
1103    }
1104
1105    return 1;
1106}
1107
1108sub push_email_addresses {
1109    my ($address, $role) = @_;
1110
1111    my @address_list = ();
1112
1113    if (rfc822_valid($address)) {
1114	push_email_address($address, $role);
1115    } elsif (@address_list = rfc822_validlist($address)) {
1116	my $array_count = shift(@address_list);
1117	while (my $entry = shift(@address_list)) {
1118	    push_email_address($entry, $role);
1119	}
1120    } else {
1121	if (!push_email_address($address, $role)) {
1122	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
1123	}
1124    }
1125}
1126
1127sub add_role {
1128    my ($line, $role) = @_;
1129
1130    my ($name, $address) = parse_email($line);
1131    my $email = format_email($name, $address, $email_usename);
1132
1133    foreach my $entry (@email_to) {
1134	if ($email_remove_duplicates) {
1135	    my ($entry_name, $entry_address) = parse_email($entry->[0]);
1136	    if (($name eq $entry_name || $address eq $entry_address)
1137		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1138	    ) {
1139		if ($entry->[1] eq "") {
1140		    $entry->[1] = "$role";
1141		} else {
1142		    $entry->[1] = "$entry->[1],$role";
1143		}
1144	    }
1145	} else {
1146	    if ($email eq $entry->[0]
1147		&& ($role eq "" || !($entry->[1] =~ m/$role/))
1148	    ) {
1149		if ($entry->[1] eq "") {
1150		    $entry->[1] = "$role";
1151		} else {
1152		    $entry->[1] = "$entry->[1],$role";
1153		}
1154	    }
1155	}
1156    }
1157}
1158
1159sub which {
1160    my ($bin) = @_;
1161
1162    foreach my $path (split(/:/, $ENV{PATH})) {
1163	if (-e "$path/$bin") {
1164	    return "$path/$bin";
1165	}
1166    }
1167
1168    return "";
1169}
1170
1171sub which_conf {
1172    my ($conf) = @_;
1173
1174    foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
1175	if (-e "$path/$conf") {
1176	    return "$path/$conf";
1177	}
1178    }
1179
1180    return "";
1181}
1182
1183sub mailmap_email {
1184    my ($line) = @_;
1185
1186    my ($name, $address) = parse_email($line);
1187    my $email = format_email($name, $address, 1);
1188    my $real_name = $name;
1189    my $real_address = $address;
1190
1191    if (exists $mailmap->{names}->{$email} ||
1192	exists $mailmap->{addresses}->{$email}) {
1193	if (exists $mailmap->{names}->{$email}) {
1194	    $real_name = $mailmap->{names}->{$email};
1195	}
1196	if (exists $mailmap->{addresses}->{$email}) {
1197	    $real_address = $mailmap->{addresses}->{$email};
1198	}
1199    } else {
1200	if (exists $mailmap->{names}->{$address}) {
1201	    $real_name = $mailmap->{names}->{$address};
1202	}
1203	if (exists $mailmap->{addresses}->{$address}) {
1204	    $real_address = $mailmap->{addresses}->{$address};
1205	}
1206    }
1207    return format_email($real_name, $real_address, 1);
1208}
1209
1210sub mailmap {
1211    my (@addresses) = @_;
1212
1213    my @mapped_emails = ();
1214    foreach my $line (@addresses) {
1215	push(@mapped_emails, mailmap_email($line));
1216    }
1217    merge_by_realname(@mapped_emails) if ($email_use_mailmap);
1218    return @mapped_emails;
1219}
1220
1221sub merge_by_realname {
1222    my %address_map;
1223    my (@emails) = @_;
1224
1225    foreach my $email (@emails) {
1226	my ($name, $address) = parse_email($email);
1227	if (exists $address_map{$name}) {
1228	    $address = $address_map{$name};
1229	    $email = format_email($name, $address, 1);
1230	} else {
1231	    $address_map{$name} = $address;
1232	}
1233    }
1234}
1235
1236sub git_execute_cmd {
1237    my ($cmd) = @_;
1238    my @lines = ();
1239
1240    my $output = `$cmd`;
1241    $output =~ s/^\s*//gm;
1242    @lines = split("\n", $output);
1243
1244    return @lines;
1245}
1246
1247sub hg_execute_cmd {
1248    my ($cmd) = @_;
1249    my @lines = ();
1250
1251    my $output = `$cmd`;
1252    @lines = split("\n", $output);
1253
1254    return @lines;
1255}
1256
1257sub extract_formatted_signatures {
1258    my (@signature_lines) = @_;
1259
1260    my @type = @signature_lines;
1261
1262    s/\s*(.*):.*/$1/ for (@type);
1263
1264    # cut -f2- -d":"
1265    s/\s*.*:\s*(.+)\s*/$1/ for (@signature_lines);
1266
1267## Reformat email addresses (with names) to avoid badly written signatures
1268
1269    foreach my $signer (@signature_lines) {
1270	$signer = deduplicate_email($signer);
1271    }
1272
1273    return (\@type, \@signature_lines);
1274}
1275
1276sub vcs_find_signers {
1277    my ($cmd, $file) = @_;
1278    my $commits;
1279    my @lines = ();
1280    my @signatures = ();
1281    my @authors = ();
1282    my @stats = ();
1283
1284    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1285
1286    my $pattern = $VCS_cmds{"commit_pattern"};
1287    my $author_pattern = $VCS_cmds{"author_pattern"};
1288    my $stat_pattern = $VCS_cmds{"stat_pattern"};
1289
1290    $stat_pattern =~ s/(\$\w+)/$1/eeg;		#interpolate $stat_pattern
1291
1292    $commits = grep(/$pattern/, @lines);	# of commits
1293
1294    @authors = grep(/$author_pattern/, @lines);
1295    @signatures = grep(/^[ \t]*${signature_pattern}.*\@.*$/, @lines);
1296    @stats = grep(/$stat_pattern/, @lines);
1297
1298#    print("stats: <@stats>\n");
1299
1300    return (0, \@signatures, \@authors, \@stats) if !@signatures;
1301
1302    save_commits_by_author(@lines) if ($interactive);
1303    save_commits_by_signer(@lines) if ($interactive);
1304
1305    if (!$email_git_penguin_chiefs) {
1306	@signatures = grep(!/${penguin_chiefs}/i, @signatures);
1307    }
1308
1309    my ($author_ref, $authors_ref) = extract_formatted_signatures(@authors);
1310    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1311
1312    return ($commits, $signers_ref, $authors_ref, \@stats);
1313}
1314
1315sub vcs_find_author {
1316    my ($cmd) = @_;
1317    my @lines = ();
1318
1319    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1320
1321    if (!$email_git_penguin_chiefs) {
1322	@lines = grep(!/${penguin_chiefs}/i, @lines);
1323    }
1324
1325    return @lines if !@lines;
1326
1327    my @authors = ();
1328    foreach my $line (@lines) {
1329	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1330	    my $author = $1;
1331	    my ($name, $address) = parse_email($author);
1332	    $author = format_email($name, $address, 1);
1333	    push(@authors, $author);
1334	}
1335    }
1336
1337    save_commits_by_author(@lines) if ($interactive);
1338    save_commits_by_signer(@lines) if ($interactive);
1339
1340    return @authors;
1341}
1342
1343sub vcs_save_commits {
1344    my ($cmd) = @_;
1345    my @lines = ();
1346    my @commits = ();
1347
1348    @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1349
1350    foreach my $line (@lines) {
1351	if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1352	    push(@commits, $1);
1353	}
1354    }
1355
1356    return @commits;
1357}
1358
1359sub vcs_blame {
1360    my ($file) = @_;
1361    my $cmd;
1362    my @commits = ();
1363
1364    return @commits if (!(-f $file));
1365
1366    if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1367	my @all_commits = ();
1368
1369	$cmd = $VCS_cmds{"blame_file_cmd"};
1370	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1371	@all_commits = vcs_save_commits($cmd);
1372
1373	foreach my $file_range_diff (@range) {
1374	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1375	    my $diff_file = $1;
1376	    my $diff_start = $2;
1377	    my $diff_length = $3;
1378	    next if ("$file" ne "$diff_file");
1379	    for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1380		push(@commits, $all_commits[$i]);
1381	    }
1382	}
1383    } elsif (@range) {
1384	foreach my $file_range_diff (@range) {
1385	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1386	    my $diff_file = $1;
1387	    my $diff_start = $2;
1388	    my $diff_length = $3;
1389	    next if ("$file" ne "$diff_file");
1390	    $cmd = $VCS_cmds{"blame_range_cmd"};
1391	    $cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1392	    push(@commits, vcs_save_commits($cmd));
1393	}
1394    } else {
1395	$cmd = $VCS_cmds{"blame_file_cmd"};
1396	$cmd =~ s/(\$\w+)/$1/eeg;		#interpolate $cmd
1397	@commits = vcs_save_commits($cmd);
1398    }
1399
1400    foreach my $commit (@commits) {
1401	$commit =~ s/^\^//g;
1402    }
1403
1404    return @commits;
1405}
1406
1407my $printed_novcs = 0;
1408sub vcs_exists {
1409    %VCS_cmds = %VCS_cmds_git;
1410    return 1 if eval $VCS_cmds{"available"};
1411    %VCS_cmds = %VCS_cmds_hg;
1412    return 2 if eval $VCS_cmds{"available"};
1413    %VCS_cmds = ();
1414    if (!$printed_novcs) {
1415	warn("$P: No supported VCS found.  Add --nogit to options?\n");
1416	warn("Using a git repository produces better results.\n");
1417	warn("Try Linus Torvalds' latest git repository using:\n");
1418	warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
1419	$printed_novcs = 1;
1420    }
1421    return 0;
1422}
1423
1424sub vcs_is_git {
1425    vcs_exists();
1426    return $vcs_used == 1;
1427}
1428
1429sub vcs_is_hg {
1430    return $vcs_used == 2;
1431}
1432
1433sub interactive_get_maintainers {
1434    my ($list_ref) = @_;
1435    my @list = @$list_ref;
1436
1437    vcs_exists();
1438
1439    my %selected;
1440    my %authored;
1441    my %signed;
1442    my $count = 0;
1443    my $maintained = 0;
1444    foreach my $entry (@list) {
1445	$maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
1446	$selected{$count} = 1;
1447	$authored{$count} = 0;
1448	$signed{$count} = 0;
1449	$count++;
1450    }
1451
1452    #menu loop
1453    my $done = 0;
1454    my $print_options = 0;
1455    my $redraw = 1;
1456    while (!$done) {
1457	$count = 0;
1458	if ($redraw) {
1459	    printf STDERR "\n%1s %2s %-65s",
1460			  "*", "#", "email/list and role:stats";
1461	    if ($email_git ||
1462		($email_git_fallback && !$maintained) ||
1463		$email_git_blame) {
1464		print STDERR "auth sign";
1465	    }
1466	    print STDERR "\n";
1467	    foreach my $entry (@list) {
1468		my $email = $entry->[0];
1469		my $role = $entry->[1];
1470		my $sel = "";
1471		$sel = "*" if ($selected{$count});
1472		my $commit_author = $commit_author_hash{$email};
1473		my $commit_signer = $commit_signer_hash{$email};
1474		my $authored = 0;
1475		my $signed = 0;
1476		$authored++ for (@{$commit_author});
1477		$signed++ for (@{$commit_signer});
1478		printf STDERR "%1s %2d %-65s", $sel, $count + 1, $email;
1479		printf STDERR "%4d %4d", $authored, $signed
1480		    if ($authored > 0 || $signed > 0);
1481		printf STDERR "\n     %s\n", $role;
1482		if ($authored{$count}) {
1483		    my $commit_author = $commit_author_hash{$email};
1484		    foreach my $ref (@{$commit_author}) {
1485			print STDERR "     Author: @{$ref}[1]\n";
1486		    }
1487		}
1488		if ($signed{$count}) {
1489		    my $commit_signer = $commit_signer_hash{$email};
1490		    foreach my $ref (@{$commit_signer}) {
1491			print STDERR "     @{$ref}[2]: @{$ref}[1]\n";
1492		    }
1493		}
1494
1495		$count++;
1496	    }
1497	}
1498	my $date_ref = \$email_git_since;
1499	$date_ref = \$email_hg_since if (vcs_is_hg());
1500	if ($print_options) {
1501	    $print_options = 0;
1502	    if (vcs_exists()) {
1503		print STDERR <<EOT
1504
1505Version Control options:
1506g  use git history      [$email_git]
1507gf use git-fallback     [$email_git_fallback]
1508b  use git blame        [$email_git_blame]
1509bs use blame signatures [$email_git_blame_signatures]
1510c# minimum commits      [$email_git_min_signatures]
1511%# min percent          [$email_git_min_percent]
1512d# history to use       [$$date_ref]
1513x# max maintainers      [$email_git_max_maintainers]
1514t  all signature types  [$email_git_all_signature_types]
1515m  use .mailmap         [$email_use_mailmap]
1516EOT
1517	    }
1518	    print STDERR <<EOT
1519
1520Additional options:
15210  toggle all
1522tm toggle maintainers
1523tg toggle git entries
1524tl toggle open list entries
1525ts toggle subscriber list entries
1526f  emails in file       [$file_emails]
1527k  keywords in file     [$keywords]
1528r  remove duplicates    [$email_remove_duplicates]
1529p# pattern match depth  [$pattern_depth]
1530EOT
1531	}
1532	print STDERR
1533"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
1534
1535	my $input = <STDIN>;
1536	chomp($input);
1537
1538	$redraw = 1;
1539	my $rerun = 0;
1540	my @wish = split(/[, ]+/, $input);
1541	foreach my $nr (@wish) {
1542	    $nr = lc($nr);
1543	    my $sel = substr($nr, 0, 1);
1544	    my $str = substr($nr, 1);
1545	    my $val = 0;
1546	    $val = $1 if $str =~ /^(\d+)$/;
1547
1548	    if ($sel eq "y") {
1549		$interactive = 0;
1550		$done = 1;
1551		$output_rolestats = 0;
1552		$output_roles = 0;
1553		last;
1554	    } elsif ($nr =~ /^\d+$/ && $nr > 0 && $nr <= $count) {
1555		$selected{$nr - 1} = !$selected{$nr - 1};
1556	    } elsif ($sel eq "*" || $sel eq '^') {
1557		my $toggle = 0;
1558		$toggle = 1 if ($sel eq '*');
1559		for (my $i = 0; $i < $count; $i++) {
1560		    $selected{$i} = $toggle;
1561		}
1562	    } elsif ($sel eq "0") {
1563		for (my $i = 0; $i < $count; $i++) {
1564		    $selected{$i} = !$selected{$i};
1565		}
1566	    } elsif ($sel eq "t") {
1567		if (lc($str) eq "m") {
1568		    for (my $i = 0; $i < $count; $i++) {
1569			$selected{$i} = !$selected{$i}
1570			    if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
1571		    }
1572		} elsif (lc($str) eq "g") {
1573		    for (my $i = 0; $i < $count; $i++) {
1574			$selected{$i} = !$selected{$i}
1575			    if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
1576		    }
1577		} elsif (lc($str) eq "l") {
1578		    for (my $i = 0; $i < $count; $i++) {
1579			$selected{$i} = !$selected{$i}
1580			    if ($list[$i]->[1] =~ /^(open list)/i);
1581		    }
1582		} elsif (lc($str) eq "s") {
1583		    for (my $i = 0; $i < $count; $i++) {
1584			$selected{$i} = !$selected{$i}
1585			    if ($list[$i]->[1] =~ /^(subscriber list)/i);
1586		    }
1587		}
1588	    } elsif ($sel eq "a") {
1589		if ($val > 0 && $val <= $count) {
1590		    $authored{$val - 1} = !$authored{$val - 1};
1591		} elsif ($str eq '*' || $str eq '^') {
1592		    my $toggle = 0;
1593		    $toggle = 1 if ($str eq '*');
1594		    for (my $i = 0; $i < $count; $i++) {
1595			$authored{$i} = $toggle;
1596		    }
1597		}
1598	    } elsif ($sel eq "s") {
1599		if ($val > 0 && $val <= $count) {
1600		    $signed{$val - 1} = !$signed{$val - 1};
1601		} elsif ($str eq '*' || $str eq '^') {
1602		    my $toggle = 0;
1603		    $toggle = 1 if ($str eq '*');
1604		    for (my $i = 0; $i < $count; $i++) {
1605			$signed{$i} = $toggle;
1606		    }
1607		}
1608	    } elsif ($sel eq "o") {
1609		$print_options = 1;
1610		$redraw = 1;
1611	    } elsif ($sel eq "g") {
1612		if ($str eq "f") {
1613		    bool_invert(\$email_git_fallback);
1614		} else {
1615		    bool_invert(\$email_git);
1616		}
1617		$rerun = 1;
1618	    } elsif ($sel eq "b") {
1619		if ($str eq "s") {
1620		    bool_invert(\$email_git_blame_signatures);
1621		} else {
1622		    bool_invert(\$email_git_blame);
1623		}
1624		$rerun = 1;
1625	    } elsif ($sel eq "c") {
1626		if ($val > 0) {
1627		    $email_git_min_signatures = $val;
1628		    $rerun = 1;
1629		}
1630	    } elsif ($sel eq "x") {
1631		if ($val > 0) {
1632		    $email_git_max_maintainers = $val;
1633		    $rerun = 1;
1634		}
1635	    } elsif ($sel eq "%") {
1636		if ($str ne "" && $val >= 0) {
1637		    $email_git_min_percent = $val;
1638		    $rerun = 1;
1639		}
1640	    } elsif ($sel eq "d") {
1641		if (vcs_is_git()) {
1642		    $email_git_since = $str;
1643		} elsif (vcs_is_hg()) {
1644		    $email_hg_since = $str;
1645		}
1646		$rerun = 1;
1647	    } elsif ($sel eq "t") {
1648		bool_invert(\$email_git_all_signature_types);
1649		$rerun = 1;
1650	    } elsif ($sel eq "f") {
1651		bool_invert(\$file_emails);
1652		$rerun = 1;
1653	    } elsif ($sel eq "r") {
1654		bool_invert(\$email_remove_duplicates);
1655		$rerun = 1;
1656	    } elsif ($sel eq "m") {
1657		bool_invert(\$email_use_mailmap);
1658		read_mailmap();
1659		$rerun = 1;
1660	    } elsif ($sel eq "k") {
1661		bool_invert(\$keywords);
1662		$rerun = 1;
1663	    } elsif ($sel eq "p") {
1664		if ($str ne "" && $val >= 0) {
1665		    $pattern_depth = $val;
1666		    $rerun = 1;
1667		}
1668	    } elsif ($sel eq "h" || $sel eq "?") {
1669		print STDERR <<EOT
1670
1671Interactive mode allows you to select the various maintainers, submitters,
1672commit signers and mailing lists that could be CC'd on a patch.
1673
1674Any *'d entry is selected.
1675
1676If you have git or hg installed, you can choose to summarize the commit
1677history of files in the patch.  Also, each line of the current file can
1678be matched to its commit author and that commits signers with blame.
1679
1680Various knobs exist to control the length of time for active commit
1681tracking, the maximum number of commit authors and signers to add,
1682and such.
1683
1684Enter selections at the prompt until you are satisfied that the selected
1685maintainers are appropriate.  You may enter multiple selections separated
1686by either commas or spaces.
1687
1688EOT
1689	    } else {
1690		print STDERR "invalid option: '$nr'\n";
1691		$redraw = 0;
1692	    }
1693	}
1694	if ($rerun) {
1695	    print STDERR "git-blame can be very slow, please have patience..."
1696		if ($email_git_blame);
1697	    goto &get_maintainers;
1698	}
1699    }
1700
1701    #drop not selected entries
1702    $count = 0;
1703    my @new_emailto = ();
1704    foreach my $entry (@list) {
1705	if ($selected{$count}) {
1706	    push(@new_emailto, $list[$count]);
1707	}
1708	$count++;
1709    }
1710    return @new_emailto;
1711}
1712
1713sub bool_invert {
1714    my ($bool_ref) = @_;
1715
1716    if ($$bool_ref) {
1717	$$bool_ref = 0;
1718    } else {
1719	$$bool_ref = 1;
1720    }
1721}
1722
1723sub deduplicate_email {
1724    my ($email) = @_;
1725
1726    my $matched = 0;
1727    my ($name, $address) = parse_email($email);
1728    $email = format_email($name, $address, 1);
1729    $email = mailmap_email($email);
1730
1731    return $email if (!$email_remove_duplicates);
1732
1733    ($name, $address) = parse_email($email);
1734
1735    if ($name ne "" && $deduplicate_name_hash{lc($name)}) {
1736	$name = $deduplicate_name_hash{lc($name)}->[0];
1737	$address = $deduplicate_name_hash{lc($name)}->[1];
1738	$matched = 1;
1739    } elsif ($deduplicate_address_hash{lc($address)}) {
1740	$name = $deduplicate_address_hash{lc($address)}->[0];
1741	$address = $deduplicate_address_hash{lc($address)}->[1];
1742	$matched = 1;
1743    }
1744    if (!$matched) {
1745	$deduplicate_name_hash{lc($name)} = [ $name, $address ];
1746	$deduplicate_address_hash{lc($address)} = [ $name, $address ];
1747    }
1748    $email = format_email($name, $address, 1);
1749    $email = mailmap_email($email);
1750    return $email;
1751}
1752
1753sub save_commits_by_author {
1754    my (@lines) = @_;
1755
1756    my @authors = ();
1757    my @commits = ();
1758    my @subjects = ();
1759
1760    foreach my $line (@lines) {
1761	if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
1762	    my $author = $1;
1763	    $author = deduplicate_email($author);
1764	    push(@authors, $author);
1765	}
1766	push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1767	push(@subjects, $1) if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1768    }
1769
1770    for (my $i = 0; $i < @authors; $i++) {
1771	my $exists = 0;
1772	foreach my $ref(@{$commit_author_hash{$authors[$i]}}) {
1773	    if (@{$ref}[0] eq $commits[$i] &&
1774		@{$ref}[1] eq $subjects[$i]) {
1775		$exists = 1;
1776		last;
1777	    }
1778	}
1779	if (!$exists) {
1780	    push(@{$commit_author_hash{$authors[$i]}},
1781		 [ ($commits[$i], $subjects[$i]) ]);
1782	}
1783    }
1784}
1785
1786sub save_commits_by_signer {
1787    my (@lines) = @_;
1788
1789    my $commit = "";
1790    my $subject = "";
1791
1792    foreach my $line (@lines) {
1793	$commit = $1 if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
1794	$subject = $1 if ($line =~ m/$VCS_cmds{"subject_pattern"}/);
1795	if ($line =~ /^[ \t]*${signature_pattern}.*\@.*$/) {
1796	    my @signatures = ($line);
1797	    my ($types_ref, $signers_ref) = extract_formatted_signatures(@signatures);
1798	    my @types = @$types_ref;
1799	    my @signers = @$signers_ref;
1800
1801	    my $type = $types[0];
1802	    my $signer = $signers[0];
1803
1804	    $signer = deduplicate_email($signer);
1805
1806	    my $exists = 0;
1807	    foreach my $ref(@{$commit_signer_hash{$signer}}) {
1808		if (@{$ref}[0] eq $commit &&
1809		    @{$ref}[1] eq $subject &&
1810		    @{$ref}[2] eq $type) {
1811		    $exists = 1;
1812		    last;
1813		}
1814	    }
1815	    if (!$exists) {
1816		push(@{$commit_signer_hash{$signer}},
1817		     [ ($commit, $subject, $type) ]);
1818	    }
1819	}
1820    }
1821}
1822
1823sub vcs_assign {
1824    my ($role, $divisor, @lines) = @_;
1825
1826    my %hash;
1827    my $count = 0;
1828
1829    return if (@lines <= 0);
1830
1831    if ($divisor <= 0) {
1832	warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
1833	$divisor = 1;
1834    }
1835
1836    @lines = mailmap(@lines);
1837
1838    return if (@lines <= 0);
1839
1840    @lines = sort(@lines);
1841
1842    # uniq -c
1843    $hash{$_}++ for @lines;
1844
1845    # sort -rn
1846    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
1847	my $sign_offs = $hash{$line};
1848	my $percent = $sign_offs * 100 / $divisor;
1849
1850	$percent = 100 if ($percent > 100);
1851	$count++;
1852	last if ($sign_offs < $email_git_min_signatures ||
1853		 $count > $email_git_max_maintainers ||
1854		 $percent < $email_git_min_percent);
1855	push_email_address($line, '');
1856	if ($output_rolestats) {
1857	    my $fmt_percent = sprintf("%.0f", $percent);
1858	    add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
1859	} else {
1860	    add_role($line, $role);
1861	}
1862    }
1863}
1864
1865sub vcs_file_signoffs {
1866    my ($file) = @_;
1867
1868    my $authors_ref;
1869    my $signers_ref;
1870    my $stats_ref;
1871    my @authors = ();
1872    my @signers = ();
1873    my @stats = ();
1874    my $commits;
1875
1876    $vcs_used = vcs_exists();
1877    return if (!$vcs_used);
1878
1879    my $cmd = $VCS_cmds{"find_signers_cmd"};
1880    $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
1881
1882    ($commits, $signers_ref, $authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
1883
1884    @signers = @{$signers_ref} if defined $signers_ref;
1885    @authors = @{$authors_ref} if defined $authors_ref;
1886    @stats = @{$stats_ref} if defined $stats_ref;
1887
1888#    print("commits: <$commits>\nsigners:<@signers>\nauthors: <@authors>\nstats: <@stats>\n");
1889
1890    foreach my $signer (@signers) {
1891	$signer = deduplicate_email($signer);
1892    }
1893
1894    vcs_assign("commit_signer", $commits, @signers);
1895    vcs_assign("authored", $commits, @authors);
1896    if ($#authors == $#stats) {
1897	my $stat_pattern = $VCS_cmds{"stat_pattern"};
1898	$stat_pattern =~ s/(\$\w+)/$1/eeg;	#interpolate $stat_pattern
1899
1900	my $added = 0;
1901	my $deleted = 0;
1902	for (my $i = 0; $i <= $#stats; $i++) {
1903	    if ($stats[$i] =~ /$stat_pattern/) {
1904		$added += $1;
1905		$deleted += $2;
1906	    }
1907	}
1908	my @tmp_authors = uniq(@authors);
1909	foreach my $author (@tmp_authors) {
1910	    $author = deduplicate_email($author);
1911	}
1912	@tmp_authors = uniq(@tmp_authors);
1913	my @list_added = ();
1914	my @list_deleted = ();
1915	foreach my $author (@tmp_authors) {
1916	    my $auth_added = 0;
1917	    my $auth_deleted = 0;
1918	    for (my $i = 0; $i <= $#stats; $i++) {
1919		if ($author eq deduplicate_email($authors[$i]) &&
1920		    $stats[$i] =~ /$stat_pattern/) {
1921		    $auth_added += $1;
1922		    $auth_deleted += $2;
1923		}
1924	    }
1925	    for (my $i = 0; $i < $auth_added; $i++) {
1926		push(@list_added, $author);
1927	    }
1928	    for (my $i = 0; $i < $auth_deleted; $i++) {
1929		push(@list_deleted, $author);
1930	    }
1931	}
1932	vcs_assign("added_lines", $added, @list_added);
1933	vcs_assign("removed_lines", $deleted, @list_deleted);
1934    }
1935}
1936
1937sub vcs_file_blame {
1938    my ($file) = @_;
1939
1940    my @signers = ();
1941    my @all_commits = ();
1942    my @commits = ();
1943    my $total_commits;
1944    my $total_lines;
1945
1946    $vcs_used = vcs_exists();
1947    return if (!$vcs_used);
1948
1949    @all_commits = vcs_blame($file);
1950    @commits = uniq(@all_commits);
1951    $total_commits = @commits;
1952    $total_lines = @all_commits;
1953
1954    if ($email_git_blame_signatures) {
1955	if (vcs_is_hg()) {
1956	    my $commit_count;
1957	    my $commit_authors_ref;
1958	    my $commit_signers_ref;
1959	    my $stats_ref;
1960	    my @commit_authors = ();
1961	    my @commit_signers = ();
1962	    my $commit = join(" -r ", @commits);
1963	    my $cmd;
1964
1965	    $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1966	    $cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1967
1968	    ($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
1969	    @commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
1970	    @commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
1971
1972	    push(@signers, @commit_signers);
1973	} else {
1974	    foreach my $commit (@commits) {
1975		my $commit_count;
1976		my $commit_authors_ref;
1977		my $commit_signers_ref;
1978		my $stats_ref;
1979		my @commit_authors = ();
1980		my @commit_signers = ();
1981		my $cmd;
1982
1983		$cmd = $VCS_cmds{"find_commit_signers_cmd"};
1984		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
1985
1986		($commit_count, $commit_signers_ref, $commit_authors_ref, $stats_ref) = vcs_find_signers($cmd, $file);
1987		@commit_authors = @{$commit_authors_ref} if defined $commit_authors_ref;
1988		@commit_signers = @{$commit_signers_ref} if defined $commit_signers_ref;
1989
1990		push(@signers, @commit_signers);
1991	    }
1992	}
1993    }
1994
1995    if ($from_filename) {
1996	if ($output_rolestats) {
1997	    my @blame_signers;
1998	    if (vcs_is_hg()) {{		# Double brace for last exit
1999		my $commit_count;
2000		my @commit_signers = ();
2001		@commits = uniq(@commits);
2002		@commits = sort(@commits);
2003		my $commit = join(" -r ", @commits);
2004		my $cmd;
2005
2006		$cmd = $VCS_cmds{"find_commit_author_cmd"};
2007		$cmd =~ s/(\$\w+)/$1/eeg;	#substitute variables in $cmd
2008
2009		my @lines = ();
2010
2011		@lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
2012
2013		if (!$email_git_penguin_chiefs) {
2014		    @lines = grep(!/${penguin_chiefs}/i, @lines);
2015		}
2016
2017		last if !@lines;
2018
2019		my @authors = ();
2020		foreach my $line (@lines) {
2021		    if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
2022			my $author = $1;
2023			$author = deduplicate_email($author);
2024			push(@authors, $author);
2025		    }
2026		}
2027
2028		save_commits_by_author(@lines) if ($interactive);
2029		save_commits_by_signer(@lines) if ($interactive);
2030
2031		push(@signers, @authors);
2032	    }}
2033	    else {
2034		foreach my $commit (@commits) {
2035		    my $i;
2036		    my $cmd = $VCS_cmds{"find_commit_author_cmd"};
2037		    $cmd =~ s/(\$\w+)/$1/eeg;	#interpolate $cmd
2038		    my @author = vcs_find_author($cmd);
2039		    next if !@author;
2040
2041		    my $formatted_author = deduplicate_email($author[0]);
2042
2043		    my $count = grep(/$commit/, @all_commits);
2044		    for ($i = 0; $i < $count ; $i++) {
2045			push(@blame_signers, $formatted_author);
2046		    }
2047		}
2048	    }
2049	    if (@blame_signers) {
2050		vcs_assign("authored lines", $total_lines, @blame_signers);
2051	    }
2052	}
2053	foreach my $signer (@signers) {
2054	    $signer = deduplicate_email($signer);
2055	}
2056	vcs_assign("commits", $total_commits, @signers);
2057    } else {
2058	foreach my $signer (@signers) {
2059	    $signer = deduplicate_email($signer);
2060	}
2061	vcs_assign("modified commits", $total_commits, @signers);
2062    }
2063}
2064
2065sub uniq {
2066    my (@parms) = @_;
2067
2068    my %saw;
2069    @parms = grep(!$saw{$_}++, @parms);
2070    return @parms;
2071}
2072
2073sub sort_and_uniq {
2074    my (@parms) = @_;
2075
2076    my %saw;
2077    @parms = sort @parms;
2078    @parms = grep(!$saw{$_}++, @parms);
2079    return @parms;
2080}
2081
2082sub clean_file_emails {
2083    my (@file_emails) = @_;
2084    my @fmt_emails = ();
2085
2086    foreach my $email (@file_emails) {
2087	$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
2088	my ($name, $address) = parse_email($email);
2089	if ($name eq '"[,\.]"') {
2090	    $name = "";
2091	}
2092
2093	my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
2094	if (@nw > 2) {
2095	    my $first = $nw[@nw - 3];
2096	    my $middle = $nw[@nw - 2];
2097	    my $last = $nw[@nw - 1];
2098
2099	    if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
2100		 (length($first) == 2 && substr($first, -1) eq ".")) ||
2101		(length($middle) == 1 ||
2102		 (length($middle) == 2 && substr($middle, -1) eq "."))) {
2103		$name = "$first $middle $last";
2104	    } else {
2105		$name = "$middle $last";
2106	    }
2107	}
2108
2109	if (substr($name, -1) =~ /[,\.]/) {
2110	    $name = substr($name, 0, length($name) - 1);
2111	} elsif (substr($name, -2) =~ /[,\.]"/) {
2112	    $name = substr($name, 0, length($name) - 2) . '"';
2113	}
2114
2115	if (substr($name, 0, 1) =~ /[,\.]/) {
2116	    $name = substr($name, 1, length($name) - 1);
2117	} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
2118	    $name = '"' . substr($name, 2, length($name) - 2);
2119	}
2120
2121	my $fmt_email = format_email($name, $address, $email_usename);
2122	push(@fmt_emails, $fmt_email);
2123    }
2124    return @fmt_emails;
2125}
2126
2127sub merge_email {
2128    my @lines;
2129    my %saw;
2130
2131    for (@_) {
2132	my ($address, $role) = @$_;
2133	if (!$saw{$address}) {
2134	    if ($output_roles) {
2135		push(@lines, "$address ($role)");
2136	    } else {
2137		push(@lines, $address);
2138	    }
2139	    $saw{$address} = 1;
2140	}
2141    }
2142
2143    return @lines;
2144}
2145
2146sub output {
2147    my (@parms) = @_;
2148
2149    if ($output_multiline) {
2150	foreach my $line (@parms) {
2151	    print("${line}\n");
2152	}
2153    } else {
2154	print(join($output_separator, @parms));
2155	print("\n");
2156    }
2157}
2158
2159my $rfc822re;
2160
2161sub make_rfc822re {
2162#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
2163#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
2164#   This regexp will only work on addresses which have had comments stripped
2165#   and replaced with rfc822_lwsp.
2166
2167    my $specials = '()<>@,;:\\\\".\\[\\]';
2168    my $controls = '\\000-\\037\\177';
2169
2170    my $dtext = "[^\\[\\]\\r\\\\]";
2171    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
2172
2173    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
2174
2175#   Use zero-width assertion to spot the limit of an atom.  A simple
2176#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
2177    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
2178    my $word = "(?:$atom|$quoted_string)";
2179    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
2180
2181    my $sub_domain = "(?:$atom|$domain_literal)";
2182    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
2183
2184    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
2185
2186    my $phrase = "$word*";
2187    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
2188    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
2189    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
2190
2191    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
2192    my $address = "(?:$mailbox|$group)";
2193
2194    return "$rfc822_lwsp*$address";
2195}
2196
2197sub rfc822_strip_comments {
2198    my $s = shift;
2199#   Recursively remove comments, and replace with a single space.  The simpler
2200#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
2201#   chars in atoms, for example.
2202
2203    while ($s =~ s/^((?:[^"\\]|\\.)*
2204                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
2205                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
2206    return $s;
2207}
2208
2209#   valid: returns true if the parameter is an RFC822 valid address
2210#
2211sub rfc822_valid {
2212    my $s = rfc822_strip_comments(shift);
2213
2214    if (!$rfc822re) {
2215        $rfc822re = make_rfc822re();
2216    }
2217
2218    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
2219}
2220
2221#   validlist: In scalar context, returns true if the parameter is an RFC822
2222#              valid list of addresses.
2223#
2224#              In list context, returns an empty list on failure (an invalid
2225#              address was found); otherwise a list whose first element is the
2226#              number of addresses found and whose remaining elements are the
2227#              addresses.  This is needed to disambiguate failure (invalid)
2228#              from success with no addresses found, because an empty string is
2229#              a valid list.
2230
2231sub rfc822_validlist {
2232    my $s = rfc822_strip_comments(shift);
2233
2234    if (!$rfc822re) {
2235        $rfc822re = make_rfc822re();
2236    }
2237    # * null list items are valid according to the RFC
2238    # * the '1' business is to aid in distinguishing failure from no results
2239
2240    my @r;
2241    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
2242	$s =~ m/^$rfc822_char*$/) {
2243        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
2244            push(@r, $1);
2245        }
2246        return wantarray ? (scalar(@r), @r) : 1;
2247    }
2248    return wantarray ? () : 0;
2249}
2250