xref: /openbmc/linux/scripts/get_maintainer.pl (revision 4bdf0bb7)
1#!/usr/bin/perl -w
2# (c) 2007, Joe Perches <joe@perches.com>
3#           created from checkpatch.pl
4#
5# Print selected MAINTAINERS information for
6# the files modified in a patch or for a file
7#
8# usage: perl scripts/get_maintainers.pl [OPTIONS] <patch>
9#        perl scripts/get_maintainers.pl [OPTIONS] -f <file>
10#
11# Licensed under the terms of the GNU GPL License version 2
12
13use strict;
14
15my $P = $0;
16my $V = '0.20';
17
18use Getopt::Long qw(:config no_auto_abbrev);
19
20my $lk_path = "./";
21my $email = 1;
22my $email_usename = 1;
23my $email_maintainer = 1;
24my $email_list = 1;
25my $email_subscriber_list = 0;
26my $email_git = 1;
27my $email_git_penguin_chiefs = 0;
28my $email_git_min_signatures = 1;
29my $email_git_max_maintainers = 5;
30my $email_git_min_percent = 5;
31my $email_git_since = "1-year-ago";
32my $email_git_blame = 0;
33my $email_remove_duplicates = 1;
34my $output_multiline = 1;
35my $output_separator = ", ";
36my $scm = 0;
37my $web = 0;
38my $subsystem = 0;
39my $status = 0;
40my $from_filename = 0;
41my $pattern_depth = 0;
42my $version = 0;
43my $help = 0;
44
45my $exit = 0;
46
47my @penguin_chief = ();
48push(@penguin_chief,"Linus Torvalds:torvalds\@linux-foundation.org");
49#Andrew wants in on most everything - 2009/01/14
50#push(@penguin_chief,"Andrew Morton:akpm\@linux-foundation.org");
51
52my @penguin_chief_names = ();
53foreach my $chief (@penguin_chief) {
54    if ($chief =~ m/^(.*):(.*)/) {
55	my $chief_name = $1;
56	my $chief_addr = $2;
57	push(@penguin_chief_names, $chief_name);
58    }
59}
60my $penguin_chiefs = "\(" . join("|",@penguin_chief_names) . "\)";
61
62# rfc822 email address - preloaded methods go here.
63my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
64my $rfc822_char = '[\\000-\\377]';
65
66if (!GetOptions(
67		'email!' => \$email,
68		'git!' => \$email_git,
69		'git-chief-penguins!' => \$email_git_penguin_chiefs,
70		'git-min-signatures=i' => \$email_git_min_signatures,
71		'git-max-maintainers=i' => \$email_git_max_maintainers,
72		'git-min-percent=i' => \$email_git_min_percent,
73		'git-since=s' => \$email_git_since,
74		'git-blame!' => \$email_git_blame,
75		'remove-duplicates!' => \$email_remove_duplicates,
76		'm!' => \$email_maintainer,
77		'n!' => \$email_usename,
78		'l!' => \$email_list,
79		's!' => \$email_subscriber_list,
80		'multiline!' => \$output_multiline,
81		'separator=s' => \$output_separator,
82		'subsystem!' => \$subsystem,
83		'status!' => \$status,
84		'scm!' => \$scm,
85		'web!' => \$web,
86		'pattern-depth=i' => \$pattern_depth,
87		'f|file' => \$from_filename,
88		'v|version' => \$version,
89		'h|help' => \$help,
90		)) {
91    usage();
92    die "$P: invalid argument\n";
93}
94
95if ($help != 0) {
96    usage();
97    exit 0;
98}
99
100if ($version != 0) {
101    print("${P} ${V}\n");
102    exit 0;
103}
104
105if ($#ARGV < 0) {
106    usage();
107    die "$P: argument missing: patchfile or -f file please\n";
108}
109
110if ($output_separator ne ", ") {
111    $output_multiline = 0;
112}
113
114my $selections = $email + $scm + $status + $subsystem + $web;
115if ($selections == 0) {
116    usage();
117    die "$P:  Missing required option: email, scm, status, subsystem or web\n";
118}
119
120if ($email &&
121    ($email_maintainer + $email_list + $email_subscriber_list +
122     $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
123    usage();
124    die "$P: Please select at least 1 email option\n";
125}
126
127if (!top_of_kernel_tree($lk_path)) {
128    die "$P: The current directory does not appear to be "
129	. "a linux kernel source tree.\n";
130}
131
132## Read MAINTAINERS for type/value pairs
133
134my @typevalue = ();
135open(MAINT, "<${lk_path}MAINTAINERS") || die "$P: Can't open MAINTAINERS\n";
136while (<MAINT>) {
137    my $line = $_;
138
139    if ($line =~ m/^(\C):\s*(.*)/) {
140	my $type = $1;
141	my $value = $2;
142
143	##Filename pattern matching
144	if ($type eq "F" || $type eq "X") {
145	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
146	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
147	    $value =~ s/\?/\./g;         ##Convert ? to .
148	    ##if pattern is a directory and it lacks a trailing slash, add one
149	    if ((-d $value)) {
150		$value =~ s@([^/])$@$1/@;
151	    }
152	}
153	push(@typevalue, "$type:$value");
154    } elsif (!/^(\s)*$/) {
155	$line =~ s/\n$//g;
156	push(@typevalue, $line);
157    }
158}
159close(MAINT);
160
161my %mailmap;
162
163if ($email_remove_duplicates) {
164    open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n";
165    while (<MAILMAP>) {
166	my $line = $_;
167
168	next if ($line =~ m/^\s*#/);
169	next if ($line =~ m/^\s*$/);
170
171	my ($name, $address) = parse_email($line);
172	$line = format_email($name, $address);
173
174	next if ($line =~ m/^\s*$/);
175
176	if (exists($mailmap{$name})) {
177	    my $obj = $mailmap{$name};
178	    push(@$obj, $address);
179	} else {
180	    my @arr = ($address);
181	    $mailmap{$name} = \@arr;
182	}
183    }
184    close(MAILMAP);
185}
186
187## use the filenames on the command line or find the filenames in the patchfiles
188
189my @files = ();
190my @range = ();
191
192foreach my $file (@ARGV) {
193    ##if $file is a directory and it lacks a trailing slash, add one
194    if ((-d $file)) {
195	$file =~ s@([^/])$@$1/@;
196    } elsif (!(-f $file)) {
197	die "$P: file '${file}' not found\n";
198    }
199    if ($from_filename) {
200	push(@files, $file);
201    } else {
202	my $file_cnt = @files;
203	my $lastfile;
204	open(PATCH, "<$file") or die "$P: Can't open ${file}\n";
205	while (<PATCH>) {
206	    if (m/^\+\+\+\s+(\S+)/) {
207		my $filename = $1;
208		$filename =~ s@^[^/]*/@@;
209		$filename =~ s@\n@@;
210		$lastfile = $filename;
211		push(@files, $filename);
212	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
213		if ($email_git_blame) {
214		    push(@range, "$lastfile:$1:$2");
215		}
216	    }
217	}
218	close(PATCH);
219	if ($file_cnt == @files) {
220	    warn "$P: file '${file}' doesn't appear to be a patch.  "
221		. "Add -f to options?\n";
222	}
223	@files = sort_and_uniq(@files);
224    }
225}
226
227my @email_to = ();
228my @list_to = ();
229my @scm = ();
230my @web = ();
231my @subsystem = ();
232my @status = ();
233
234# Find responsible parties
235
236foreach my $file (@files) {
237
238#Do not match excluded file patterns
239
240    my $exclude = 0;
241    foreach my $line (@typevalue) {
242	if ($line =~ m/^(\C):\s*(.*)/) {
243	    my $type = $1;
244	    my $value = $2;
245	    if ($type eq 'X') {
246		if (file_match_pattern($file, $value)) {
247		    $exclude = 1;
248		    last;
249		}
250	    }
251	}
252    }
253
254    if (!$exclude) {
255	my $tvi = 0;
256	my %hash;
257	foreach my $line (@typevalue) {
258	    if ($line =~ m/^(\C):\s*(.*)/) {
259		my $type = $1;
260		my $value = $2;
261		if ($type eq 'F') {
262		    if (file_match_pattern($file, $value)) {
263			my $value_pd = ($value =~ tr@/@@);
264			my $file_pd = ($file  =~ tr@/@@);
265			$value_pd++ if (substr($value,-1,1) ne "/");
266			if ($pattern_depth == 0 ||
267			    (($file_pd - $value_pd) < $pattern_depth)) {
268			    $hash{$tvi} = $value_pd;
269			}
270		    }
271		}
272	    }
273	    $tvi++;
274	}
275	foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
276	    add_categories($line);
277	}
278    }
279
280    if ($email && $email_git) {
281	recent_git_signoffs($file);
282    }
283
284    if ($email && $email_git_blame) {
285	git_assign_blame($file);
286    }
287}
288
289if ($email) {
290    foreach my $chief (@penguin_chief) {
291	if ($chief =~ m/^(.*):(.*)/) {
292	    my $email_address;
293
294	    $email_address = format_email($1, $2);
295	    if ($email_git_penguin_chiefs) {
296		push(@email_to, $email_address);
297	    } else {
298		@email_to = grep(!/${email_address}/, @email_to);
299	    }
300	}
301    }
302}
303
304if ($email || $email_list) {
305    my @to = ();
306    if ($email) {
307	@to = (@to, @email_to);
308    }
309    if ($email_list) {
310	@to = (@to, @list_to);
311    }
312    output(uniq(@to));
313}
314
315if ($scm) {
316    @scm = uniq(@scm);
317    output(@scm);
318}
319
320if ($status) {
321    @status = uniq(@status);
322    output(@status);
323}
324
325if ($subsystem) {
326    @subsystem = uniq(@subsystem);
327    output(@subsystem);
328}
329
330if ($web) {
331    @web = uniq(@web);
332    output(@web);
333}
334
335exit($exit);
336
337sub file_match_pattern {
338    my ($file, $pattern) = @_;
339    if (substr($pattern, -1) eq "/") {
340	if ($file =~ m@^$pattern@) {
341	    return 1;
342	}
343    } else {
344	if ($file =~ m@^$pattern@) {
345	    my $s1 = ($file =~ tr@/@@);
346	    my $s2 = ($pattern =~ tr@/@@);
347	    if ($s1 == $s2) {
348		return 1;
349	    }
350	}
351    }
352    return 0;
353}
354
355sub usage {
356    print <<EOT;
357usage: $P [options] patchfile
358       $P [options] -f file|directory
359version: $V
360
361MAINTAINER field selection options:
362  --email => print email address(es) if any
363    --git => include recent git \*-by: signers
364    --git-chief-penguins => include ${penguin_chiefs}
365    --git-min-signatures => number of signatures required (default: 1)
366    --git-max-maintainers => maximum maintainers to add (default: 5)
367    --git-min-percent => minimum percentage of commits required (default: 5)
368    --git-since => git history to use (default: 1-year-ago)
369    --git-blame => use git blame to find modified commits for patch or file
370    --m => include maintainer(s) if any
371    --n => include name 'Full Name <addr\@domain.tld>'
372    --l => include list(s) if any
373    --s => include subscriber only list(s) if any
374    --remove-duplicates => minimize duplicate email names/addresses
375  --scm => print SCM tree(s) if any
376  --status => print status if any
377  --subsystem => print subsystem name if any
378  --web => print website(s) if any
379
380Output type options:
381  --separator [, ] => separator for multiple entries on 1 line
382    using --separator also sets --nomultiline if --separator is not [, ]
383  --multiline => print 1 entry per line
384
385Other options:
386  --pattern-depth => Number of pattern directory traversals (default: 0 (all))
387  --version => show version
388  --help => show this help information
389
390Default options:
391  [--email --git --m --n --l --multiline --pattern-depth=0 --remove-duplicates]
392
393Notes:
394  Using "-f directory" may give unexpected results:
395      Used with "--git", git signators for _all_ files in and below
396          directory are examined as git recurses directories.
397          Any specified X: (exclude) pattern matches are _not_ ignored.
398      Used with "--nogit", directory is used as a pattern match,
399         no individual file within the directory or subdirectory
400         is matched.
401      Used with "--git-blame", does not iterate all files in directory
402  Using "--git-blame" is slow and may add old committers and authors
403      that are no longer active maintainers to the output.
404EOT
405}
406
407sub top_of_kernel_tree {
408	my ($lk_path) = @_;
409
410	if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
411	    $lk_path .= "/";
412	}
413	if (   (-f "${lk_path}COPYING")
414	    && (-f "${lk_path}CREDITS")
415	    && (-f "${lk_path}Kbuild")
416	    && (-f "${lk_path}MAINTAINERS")
417	    && (-f "${lk_path}Makefile")
418	    && (-f "${lk_path}README")
419	    && (-d "${lk_path}Documentation")
420	    && (-d "${lk_path}arch")
421	    && (-d "${lk_path}include")
422	    && (-d "${lk_path}drivers")
423	    && (-d "${lk_path}fs")
424	    && (-d "${lk_path}init")
425	    && (-d "${lk_path}ipc")
426	    && (-d "${lk_path}kernel")
427	    && (-d "${lk_path}lib")
428	    && (-d "${lk_path}scripts")) {
429		return 1;
430	}
431	return 0;
432}
433
434sub parse_email {
435    my ($formatted_email) = @_;
436
437    my $name = "";
438    my $address = "";
439
440    if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
441	$name = $1;
442	$address = $2;
443    } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
444	$address = $1;
445    } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
446	$address = $1;
447    }
448
449    $name =~ s/^\s+|\s+$//g;
450    $name =~ s/^\"|\"$//g;
451    $address =~ s/^\s+|\s+$//g;
452
453    if ($name =~ /[^a-z0-9 \.\-]/i) {    ##has "must quote" chars
454	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
455	$name = "\"$name\"";
456    }
457
458    return ($name, $address);
459}
460
461sub format_email {
462    my ($name, $address) = @_;
463
464    my $formatted_email;
465
466    $name =~ s/^\s+|\s+$//g;
467    $name =~ s/^\"|\"$//g;
468    $address =~ s/^\s+|\s+$//g;
469
470    if ($name =~ /[^a-z0-9 \.\-]/i) {    ##has "must quote" chars
471	$name =~ s/(?<!\\)"/\\"/g;       ##escape quotes
472	$name = "\"$name\"";
473    }
474
475    if ($email_usename) {
476	if ("$name" eq "") {
477	    $formatted_email = "$address";
478	} else {
479	    $formatted_email = "$name <${address}>";
480	}
481    } else {
482	$formatted_email = $address;
483    }
484
485    return $formatted_email;
486}
487
488sub find_starting_index {
489
490    my ($index) = @_;
491
492    while ($index > 0) {
493	my $tv = $typevalue[$index];
494	if (!($tv =~ m/^(\C):\s*(.*)/)) {
495	    last;
496	}
497	$index--;
498    }
499
500    return $index;
501}
502
503sub find_ending_index {
504    my ($index) = @_;
505
506    while ($index < @typevalue) {
507	my $tv = $typevalue[$index];
508	if (!($tv =~ m/^(\C):\s*(.*)/)) {
509	    last;
510	}
511	$index++;
512    }
513
514    return $index;
515}
516
517sub add_categories {
518    my ($index) = @_;
519
520    my $i;
521    my $start = find_starting_index($index);
522    my $end = find_ending_index($index);
523
524    push(@subsystem, $typevalue[$start]);
525
526    for ($i = $start + 1; $i < $end; $i++) {
527	my $tv = $typevalue[$i];
528	if ($tv =~ m/^(\C):\s*(.*)/) {
529	    my $ptype = $1;
530	    my $pvalue = $2;
531	    if ($ptype eq "L") {
532		my $list_address = $pvalue;
533		my $list_additional = "";
534		if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
535		    $list_address = $1;
536		    $list_additional = $2;
537		}
538		if ($list_additional =~ m/subscribers-only/) {
539		    if ($email_subscriber_list) {
540			push(@list_to, $list_address);
541		    }
542		} else {
543		    if ($email_list) {
544			push(@list_to, $list_address);
545		    }
546		}
547	    } elsif ($ptype eq "M") {
548		my ($name, $address) = parse_email($pvalue);
549		if ($name eq "") {
550		    if ($i > 0) {
551			my $tv = $typevalue[$i - 1];
552			if ($tv =~ m/^(\C):\s*(.*)/) {
553			    if ($1 eq "P") {
554				$name = $2;
555				$pvalue = format_email($name, $address);
556			    }
557			}
558		    }
559		}
560		if ($email_maintainer) {
561		    push_email_addresses($pvalue);
562		}
563	    } elsif ($ptype eq "T") {
564		push(@scm, $pvalue);
565	    } elsif ($ptype eq "W") {
566		push(@web, $pvalue);
567	    } elsif ($ptype eq "S") {
568		push(@status, $pvalue);
569	    }
570	}
571    }
572}
573
574my %email_hash_name;
575my %email_hash_address;
576
577sub email_inuse {
578    my ($name, $address) = @_;
579
580    return 1 if (($name eq "") && ($address eq ""));
581    return 1 if (($name ne "") && exists($email_hash_name{$name}));
582    return 1 if (($address ne "") && exists($email_hash_address{$address}));
583
584    return 0;
585}
586
587sub push_email_address {
588    my ($line) = @_;
589
590    my ($name, $address) = parse_email($line);
591
592    if ($address eq "") {
593	return 0;
594    }
595
596    if (!$email_remove_duplicates) {
597	push(@email_to, format_email($name, $address));
598    } elsif (!email_inuse($name, $address)) {
599	push(@email_to, format_email($name, $address));
600	$email_hash_name{$name}++;
601	$email_hash_address{$address}++;
602    }
603
604    return 1;
605}
606
607sub push_email_addresses {
608    my ($address) = @_;
609
610    my @address_list = ();
611
612    if (rfc822_valid($address)) {
613	push_email_address($address);
614    } elsif (@address_list = rfc822_validlist($address)) {
615	my $array_count = shift(@address_list);
616	while (my $entry = shift(@address_list)) {
617	    push_email_address($entry);
618	}
619    } else {
620	if (!push_email_address($address)) {
621	    warn("Invalid MAINTAINERS address: '" . $address . "'\n");
622	}
623    }
624}
625
626sub which {
627    my ($bin) = @_;
628
629    foreach my $path (split(/:/, $ENV{PATH})) {
630	if (-e "$path/$bin") {
631	    return "$path/$bin";
632	}
633    }
634
635    return "";
636}
637
638sub mailmap {
639    my @lines = @_;
640    my %hash;
641
642    foreach my $line (@lines) {
643	my ($name, $address) = parse_email($line);
644	if (!exists($hash{$name})) {
645	    $hash{$name} = $address;
646	} elsif ($address ne $hash{$name}) {
647	    $address = $hash{$name};
648	    $line = format_email($name, $address);
649	}
650	if (exists($mailmap{$name})) {
651	    my $obj = $mailmap{$name};
652	    foreach my $map_address (@$obj) {
653		if (($map_address eq $address) &&
654		    ($map_address ne $hash{$name})) {
655		    $line = format_email($name, $hash{$name});
656		}
657	    }
658	}
659    }
660
661    return @lines;
662}
663
664sub recent_git_signoffs {
665    my ($file) = @_;
666
667    my $sign_offs = "";
668    my $cmd = "";
669    my $output = "";
670    my $count = 0;
671    my @lines = ();
672    my %hash;
673    my $total_sign_offs;
674
675    if (which("git") eq "") {
676	warn("$P: git not found.  Add --nogit to options?\n");
677	return;
678    }
679    if (!(-d ".git")) {
680	warn("$P: .git directory not found.  Use a git repository for better results.\n");
681	warn("$P: perhaps 'git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git'\n");
682	return;
683    }
684
685    $cmd = "git log --since=${email_git_since} -- ${file}";
686
687    $output = `${cmd}`;
688    $output =~ s/^\s*//gm;
689
690    @lines = split("\n", $output);
691
692    @lines = grep(/^[-_ 	a-z]+by:.*\@.*$/i, @lines);
693    if (!$email_git_penguin_chiefs) {
694	@lines = grep(!/${penguin_chiefs}/i, @lines);
695    }
696    # cut -f2- -d":"
697    s/.*:\s*(.+)\s*/$1/ for (@lines);
698
699    $total_sign_offs = @lines;
700
701    if ($email_remove_duplicates) {
702	@lines = mailmap(@lines);
703    }
704
705    @lines = sort(@lines);
706
707    # uniq -c
708    $hash{$_}++ for @lines;
709
710    # sort -rn
711    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
712	my $sign_offs = $hash{$line};
713	$count++;
714	last if ($sign_offs < $email_git_min_signatures ||
715		 $count > $email_git_max_maintainers ||
716		 $sign_offs * 100 / $total_sign_offs < $email_git_min_percent);
717	push_email_address($line);
718    }
719}
720
721sub save_commits {
722    my ($cmd, @commits) = @_;
723    my $output;
724    my @lines = ();
725
726    $output = `${cmd}`;
727
728    @lines = split("\n", $output);
729    foreach my $line (@lines) {
730	if ($line =~ m/^(\w+) /) {
731	    push (@commits, $1);
732	}
733    }
734    return @commits;
735}
736
737sub git_assign_blame {
738    my ($file) = @_;
739
740    my @lines = ();
741    my @commits = ();
742    my $cmd;
743    my $output;
744    my %hash;
745    my $total_sign_offs;
746    my $count;
747
748    if (@range) {
749	foreach my $file_range_diff (@range) {
750	    next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
751	    my $diff_file = $1;
752	    my $diff_start = $2;
753	    my $diff_length = $3;
754	    next if (!("$file" eq "$diff_file"));
755	    $cmd = "git blame -l -L $diff_start,+$diff_length $file";
756	    @commits = save_commits($cmd, @commits);
757	}
758    } else {
759	if (-f $file) {
760	    $cmd = "git blame -l $file";
761	    @commits = save_commits($cmd, @commits);
762	}
763    }
764
765    $total_sign_offs = 0;
766    @commits = uniq(@commits);
767    foreach my $commit (@commits) {
768	$cmd = "git log -1 ${commit}";
769
770	$output = `${cmd}`;
771	$output =~ s/^\s*//gm;
772	@lines = split("\n", $output);
773
774	@lines = grep(/^[-_ 	a-z]+by:.*\@.*$/i, @lines);
775	if (!$email_git_penguin_chiefs) {
776	    @lines = grep(!/${penguin_chiefs}/i, @lines);
777	}
778
779	# cut -f2- -d":"
780	s/.*:\s*(.+)\s*/$1/ for (@lines);
781
782	$total_sign_offs += @lines;
783
784	if ($email_remove_duplicates) {
785	    @lines = mailmap(@lines);
786	}
787
788	$hash{$_}++ for @lines;
789    }
790
791    $count = 0;
792    foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
793	my $sign_offs = $hash{$line};
794	$count++;
795	last if ($sign_offs < $email_git_min_signatures ||
796		 $count > $email_git_max_maintainers ||
797		 $sign_offs * 100 / $total_sign_offs < $email_git_min_percent);
798	push_email_address($line);
799    }
800}
801
802sub uniq {
803    my @parms = @_;
804
805    my %saw;
806    @parms = grep(!$saw{$_}++, @parms);
807    return @parms;
808}
809
810sub sort_and_uniq {
811    my @parms = @_;
812
813    my %saw;
814    @parms = sort @parms;
815    @parms = grep(!$saw{$_}++, @parms);
816    return @parms;
817}
818
819sub output {
820    my @parms = @_;
821
822    if ($output_multiline) {
823	foreach my $line (@parms) {
824	    print("${line}\n");
825	}
826    } else {
827	print(join($output_separator, @parms));
828	print("\n");
829    }
830}
831
832my $rfc822re;
833
834sub make_rfc822re {
835#   Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
836#   comment.  We must allow for rfc822_lwsp (or comments) after each of these.
837#   This regexp will only work on addresses which have had comments stripped
838#   and replaced with rfc822_lwsp.
839
840    my $specials = '()<>@,;:\\\\".\\[\\]';
841    my $controls = '\\000-\\037\\177';
842
843    my $dtext = "[^\\[\\]\\r\\\\]";
844    my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
845
846    my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
847
848#   Use zero-width assertion to spot the limit of an atom.  A simple
849#   $rfc822_lwsp* causes the regexp engine to hang occasionally.
850    my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
851    my $word = "(?:$atom|$quoted_string)";
852    my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
853
854    my $sub_domain = "(?:$atom|$domain_literal)";
855    my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
856
857    my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
858
859    my $phrase = "$word*";
860    my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
861    my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
862    my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
863
864    my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
865    my $address = "(?:$mailbox|$group)";
866
867    return "$rfc822_lwsp*$address";
868}
869
870sub rfc822_strip_comments {
871    my $s = shift;
872#   Recursively remove comments, and replace with a single space.  The simpler
873#   regexps in the Email Addressing FAQ are imperfect - they will miss escaped
874#   chars in atoms, for example.
875
876    while ($s =~ s/^((?:[^"\\]|\\.)*
877                    (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
878                    \((?:[^()\\]|\\.)*\)/$1 /osx) {}
879    return $s;
880}
881
882#   valid: returns true if the parameter is an RFC822 valid address
883#
884sub rfc822_valid ($) {
885    my $s = rfc822_strip_comments(shift);
886
887    if (!$rfc822re) {
888        $rfc822re = make_rfc822re();
889    }
890
891    return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
892}
893
894#   validlist: In scalar context, returns true if the parameter is an RFC822
895#              valid list of addresses.
896#
897#              In list context, returns an empty list on failure (an invalid
898#              address was found); otherwise a list whose first element is the
899#              number of addresses found and whose remaining elements are the
900#              addresses.  This is needed to disambiguate failure (invalid)
901#              from success with no addresses found, because an empty string is
902#              a valid list.
903
904sub rfc822_validlist ($) {
905    my $s = rfc822_strip_comments(shift);
906
907    if (!$rfc822re) {
908        $rfc822re = make_rfc822re();
909    }
910    # * null list items are valid according to the RFC
911    # * the '1' business is to aid in distinguishing failure from no results
912
913    my @r;
914    if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
915	$s =~ m/^$rfc822_char*$/) {
916        while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
917            push @r, $1;
918        }
919        return wantarray ? (scalar(@r), @r) : 1;
920    }
921    else {
922        return wantarray ? () : 0;
923    }
924}
925