1#!/usr/bin/perl -w 2# (c) 2007, Joe Perches <joe@perches.com> 3# created from checkpatch.pl 4# 5# Print selected MAINTAINERS information for 6# the files modified in a patch or for a file 7# 8# usage: perl scripts/get_maintainers.pl [OPTIONS] <patch> 9# perl scripts/get_maintainers.pl [OPTIONS] -f <file> 10# 11# Licensed under the terms of the GNU GPL License version 2 12 13use strict; 14 15my $P = $0; 16my $V = '0.20'; 17 18use Getopt::Long qw(:config no_auto_abbrev); 19 20my $lk_path = "./"; 21my $email = 1; 22my $email_usename = 1; 23my $email_maintainer = 1; 24my $email_list = 1; 25my $email_subscriber_list = 0; 26my $email_git = 1; 27my $email_git_penguin_chiefs = 0; 28my $email_git_min_signatures = 1; 29my $email_git_max_maintainers = 5; 30my $email_git_min_percent = 5; 31my $email_git_since = "1-year-ago"; 32my $email_git_blame = 0; 33my $email_remove_duplicates = 1; 34my $output_multiline = 1; 35my $output_separator = ", "; 36my $scm = 0; 37my $web = 0; 38my $subsystem = 0; 39my $status = 0; 40my $from_filename = 0; 41my $pattern_depth = 0; 42my $version = 0; 43my $help = 0; 44 45my $exit = 0; 46 47my @penguin_chief = (); 48push(@penguin_chief,"Linus Torvalds:torvalds\@linux-foundation.org"); 49#Andrew wants in on most everything - 2009/01/14 50#push(@penguin_chief,"Andrew Morton:akpm\@linux-foundation.org"); 51 52my @penguin_chief_names = (); 53foreach my $chief (@penguin_chief) { 54 if ($chief =~ m/^(.*):(.*)/) { 55 my $chief_name = $1; 56 my $chief_addr = $2; 57 push(@penguin_chief_names, $chief_name); 58 } 59} 60my $penguin_chiefs = "\(" . join("|",@penguin_chief_names) . "\)"; 61 62# rfc822 email address - preloaded methods go here. 63my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])"; 64my $rfc822_char = '[\\000-\\377]'; 65 66if (!GetOptions( 67 'email!' => \$email, 68 'git!' => \$email_git, 69 'git-chief-penguins!' => \$email_git_penguin_chiefs, 70 'git-min-signatures=i' => \$email_git_min_signatures, 71 'git-max-maintainers=i' => \$email_git_max_maintainers, 72 'git-min-percent=i' => \$email_git_min_percent, 73 'git-since=s' => \$email_git_since, 74 'git-blame!' => \$email_git_blame, 75 'remove-duplicates!' => \$email_remove_duplicates, 76 'm!' => \$email_maintainer, 77 'n!' => \$email_usename, 78 'l!' => \$email_list, 79 's!' => \$email_subscriber_list, 80 'multiline!' => \$output_multiline, 81 'separator=s' => \$output_separator, 82 'subsystem!' => \$subsystem, 83 'status!' => \$status, 84 'scm!' => \$scm, 85 'web!' => \$web, 86 'pattern-depth=i' => \$pattern_depth, 87 'f|file' => \$from_filename, 88 'v|version' => \$version, 89 'h|help' => \$help, 90 )) { 91 usage(); 92 die "$P: invalid argument\n"; 93} 94 95if ($help != 0) { 96 usage(); 97 exit 0; 98} 99 100if ($version != 0) { 101 print("${P} ${V}\n"); 102 exit 0; 103} 104 105if ($#ARGV < 0) { 106 usage(); 107 die "$P: argument missing: patchfile or -f file please\n"; 108} 109 110if ($output_separator ne ", ") { 111 $output_multiline = 0; 112} 113 114my $selections = $email + $scm + $status + $subsystem + $web; 115if ($selections == 0) { 116 usage(); 117 die "$P: Missing required option: email, scm, status, subsystem or web\n"; 118} 119 120if ($email && 121 ($email_maintainer + $email_list + $email_subscriber_list + 122 $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) { 123 usage(); 124 die "$P: Please select at least 1 email option\n"; 125} 126 127if (!top_of_kernel_tree($lk_path)) { 128 die "$P: The current directory does not appear to be " 129 . "a linux kernel source tree.\n"; 130} 131 132## Read MAINTAINERS for type/value pairs 133 134my @typevalue = (); 135open(MAINT, "<${lk_path}MAINTAINERS") || die "$P: Can't open MAINTAINERS\n"; 136while (<MAINT>) { 137 my $line = $_; 138 139 if ($line =~ m/^(\C):\s*(.*)/) { 140 my $type = $1; 141 my $value = $2; 142 143 ##Filename pattern matching 144 if ($type eq "F" || $type eq "X") { 145 $value =~ s@\.@\\\.@g; ##Convert . to \. 146 $value =~ s/\*/\.\*/g; ##Convert * to .* 147 $value =~ s/\?/\./g; ##Convert ? to . 148 ##if pattern is a directory and it lacks a trailing slash, add one 149 if ((-d $value)) { 150 $value =~ s@([^/])$@$1/@; 151 } 152 } 153 push(@typevalue, "$type:$value"); 154 } elsif (!/^(\s)*$/) { 155 $line =~ s/\n$//g; 156 push(@typevalue, $line); 157 } 158} 159close(MAINT); 160 161my %mailmap; 162 163if ($email_remove_duplicates) { 164 open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n"; 165 while (<MAILMAP>) { 166 my $line = $_; 167 168 next if ($line =~ m/^\s*#/); 169 next if ($line =~ m/^\s*$/); 170 171 my ($name, $address) = parse_email($line); 172 $line = format_email($name, $address); 173 174 next if ($line =~ m/^\s*$/); 175 176 if (exists($mailmap{$name})) { 177 my $obj = $mailmap{$name}; 178 push(@$obj, $address); 179 } else { 180 my @arr = ($address); 181 $mailmap{$name} = \@arr; 182 } 183 } 184 close(MAILMAP); 185} 186 187## use the filenames on the command line or find the filenames in the patchfiles 188 189my @files = (); 190my @range = (); 191 192foreach my $file (@ARGV) { 193 ##if $file is a directory and it lacks a trailing slash, add one 194 if ((-d $file)) { 195 $file =~ s@([^/])$@$1/@; 196 } elsif (!(-f $file)) { 197 die "$P: file '${file}' not found\n"; 198 } 199 if ($from_filename) { 200 push(@files, $file); 201 } else { 202 my $file_cnt = @files; 203 my $lastfile; 204 open(PATCH, "<$file") or die "$P: Can't open ${file}\n"; 205 while (<PATCH>) { 206 if (m/^\+\+\+\s+(\S+)/) { 207 my $filename = $1; 208 $filename =~ s@^[^/]*/@@; 209 $filename =~ s@\n@@; 210 $lastfile = $filename; 211 push(@files, $filename); 212 } elsif (m/^\@\@ -(\d+),(\d+)/) { 213 if ($email_git_blame) { 214 push(@range, "$lastfile:$1:$2"); 215 } 216 } 217 } 218 close(PATCH); 219 if ($file_cnt == @files) { 220 warn "$P: file '${file}' doesn't appear to be a patch. " 221 . "Add -f to options?\n"; 222 } 223 @files = sort_and_uniq(@files); 224 } 225} 226 227my @email_to = (); 228my @list_to = (); 229my @scm = (); 230my @web = (); 231my @subsystem = (); 232my @status = (); 233 234# Find responsible parties 235 236foreach my $file (@files) { 237 238#Do not match excluded file patterns 239 240 my $exclude = 0; 241 foreach my $line (@typevalue) { 242 if ($line =~ m/^(\C):\s*(.*)/) { 243 my $type = $1; 244 my $value = $2; 245 if ($type eq 'X') { 246 if (file_match_pattern($file, $value)) { 247 $exclude = 1; 248 last; 249 } 250 } 251 } 252 } 253 254 if (!$exclude) { 255 my $tvi = 0; 256 my %hash; 257 foreach my $line (@typevalue) { 258 if ($line =~ m/^(\C):\s*(.*)/) { 259 my $type = $1; 260 my $value = $2; 261 if ($type eq 'F') { 262 if (file_match_pattern($file, $value)) { 263 my $value_pd = ($value =~ tr@/@@); 264 my $file_pd = ($file =~ tr@/@@); 265 $value_pd++ if (substr($value,-1,1) ne "/"); 266 if ($pattern_depth == 0 || 267 (($file_pd - $value_pd) < $pattern_depth)) { 268 $hash{$tvi} = $value_pd; 269 } 270 } 271 } 272 } 273 $tvi++; 274 } 275 foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { 276 add_categories($line); 277 } 278 } 279 280 if ($email && $email_git) { 281 recent_git_signoffs($file); 282 } 283 284 if ($email && $email_git_blame) { 285 git_assign_blame($file); 286 } 287} 288 289if ($email) { 290 foreach my $chief (@penguin_chief) { 291 if ($chief =~ m/^(.*):(.*)/) { 292 my $email_address; 293 294 $email_address = format_email($1, $2); 295 if ($email_git_penguin_chiefs) { 296 push(@email_to, $email_address); 297 } else { 298 @email_to = grep(!/${email_address}/, @email_to); 299 } 300 } 301 } 302} 303 304if ($email || $email_list) { 305 my @to = (); 306 if ($email) { 307 @to = (@to, @email_to); 308 } 309 if ($email_list) { 310 @to = (@to, @list_to); 311 } 312 output(uniq(@to)); 313} 314 315if ($scm) { 316 @scm = uniq(@scm); 317 output(@scm); 318} 319 320if ($status) { 321 @status = uniq(@status); 322 output(@status); 323} 324 325if ($subsystem) { 326 @subsystem = uniq(@subsystem); 327 output(@subsystem); 328} 329 330if ($web) { 331 @web = uniq(@web); 332 output(@web); 333} 334 335exit($exit); 336 337sub file_match_pattern { 338 my ($file, $pattern) = @_; 339 if (substr($pattern, -1) eq "/") { 340 if ($file =~ m@^$pattern@) { 341 return 1; 342 } 343 } else { 344 if ($file =~ m@^$pattern@) { 345 my $s1 = ($file =~ tr@/@@); 346 my $s2 = ($pattern =~ tr@/@@); 347 if ($s1 == $s2) { 348 return 1; 349 } 350 } 351 } 352 return 0; 353} 354 355sub usage { 356 print <<EOT; 357usage: $P [options] patchfile 358 $P [options] -f file|directory 359version: $V 360 361MAINTAINER field selection options: 362 --email => print email address(es) if any 363 --git => include recent git \*-by: signers 364 --git-chief-penguins => include ${penguin_chiefs} 365 --git-min-signatures => number of signatures required (default: 1) 366 --git-max-maintainers => maximum maintainers to add (default: 5) 367 --git-min-percent => minimum percentage of commits required (default: 5) 368 --git-since => git history to use (default: 1-year-ago) 369 --git-blame => use git blame to find modified commits for patch or file 370 --m => include maintainer(s) if any 371 --n => include name 'Full Name <addr\@domain.tld>' 372 --l => include list(s) if any 373 --s => include subscriber only list(s) if any 374 --remove-duplicates => minimize duplicate email names/addresses 375 --scm => print SCM tree(s) if any 376 --status => print status if any 377 --subsystem => print subsystem name if any 378 --web => print website(s) if any 379 380Output type options: 381 --separator [, ] => separator for multiple entries on 1 line 382 using --separator also sets --nomultiline if --separator is not [, ] 383 --multiline => print 1 entry per line 384 385Other options: 386 --pattern-depth => Number of pattern directory traversals (default: 0 (all)) 387 --version => show version 388 --help => show this help information 389 390Default options: 391 [--email --git --m --n --l --multiline --pattern-depth=0 --remove-duplicates] 392 393Notes: 394 Using "-f directory" may give unexpected results: 395 Used with "--git", git signators for _all_ files in and below 396 directory are examined as git recurses directories. 397 Any specified X: (exclude) pattern matches are _not_ ignored. 398 Used with "--nogit", directory is used as a pattern match, 399 no individual file within the directory or subdirectory 400 is matched. 401 Used with "--git-blame", does not iterate all files in directory 402 Using "--git-blame" is slow and may add old committers and authors 403 that are no longer active maintainers to the output. 404EOT 405} 406 407sub top_of_kernel_tree { 408 my ($lk_path) = @_; 409 410 if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") { 411 $lk_path .= "/"; 412 } 413 if ( (-f "${lk_path}COPYING") 414 && (-f "${lk_path}CREDITS") 415 && (-f "${lk_path}Kbuild") 416 && (-f "${lk_path}MAINTAINERS") 417 && (-f "${lk_path}Makefile") 418 && (-f "${lk_path}README") 419 && (-d "${lk_path}Documentation") 420 && (-d "${lk_path}arch") 421 && (-d "${lk_path}include") 422 && (-d "${lk_path}drivers") 423 && (-d "${lk_path}fs") 424 && (-d "${lk_path}init") 425 && (-d "${lk_path}ipc") 426 && (-d "${lk_path}kernel") 427 && (-d "${lk_path}lib") 428 && (-d "${lk_path}scripts")) { 429 return 1; 430 } 431 return 0; 432} 433 434sub parse_email { 435 my ($formatted_email) = @_; 436 437 my $name = ""; 438 my $address = ""; 439 440 if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) { 441 $name = $1; 442 $address = $2; 443 } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) { 444 $address = $1; 445 } elsif ($formatted_email =~ /^(.+\@\S*).*$/) { 446 $address = $1; 447 } 448 449 $name =~ s/^\s+|\s+$//g; 450 $name =~ s/^\"|\"$//g; 451 $address =~ s/^\s+|\s+$//g; 452 453 if ($name =~ /[^a-z0-9 \.\-]/i) { ##has "must quote" chars 454 $name =~ s/(?<!\\)"/\\"/g; ##escape quotes 455 $name = "\"$name\""; 456 } 457 458 return ($name, $address); 459} 460 461sub format_email { 462 my ($name, $address) = @_; 463 464 my $formatted_email; 465 466 $name =~ s/^\s+|\s+$//g; 467 $name =~ s/^\"|\"$//g; 468 $address =~ s/^\s+|\s+$//g; 469 470 if ($name =~ /[^a-z0-9 \.\-]/i) { ##has "must quote" chars 471 $name =~ s/(?<!\\)"/\\"/g; ##escape quotes 472 $name = "\"$name\""; 473 } 474 475 if ($email_usename) { 476 if ("$name" eq "") { 477 $formatted_email = "$address"; 478 } else { 479 $formatted_email = "$name <${address}>"; 480 } 481 } else { 482 $formatted_email = $address; 483 } 484 485 return $formatted_email; 486} 487 488sub find_starting_index { 489 490 my ($index) = @_; 491 492 while ($index > 0) { 493 my $tv = $typevalue[$index]; 494 if (!($tv =~ m/^(\C):\s*(.*)/)) { 495 last; 496 } 497 $index--; 498 } 499 500 return $index; 501} 502 503sub find_ending_index { 504 my ($index) = @_; 505 506 while ($index < @typevalue) { 507 my $tv = $typevalue[$index]; 508 if (!($tv =~ m/^(\C):\s*(.*)/)) { 509 last; 510 } 511 $index++; 512 } 513 514 return $index; 515} 516 517sub add_categories { 518 my ($index) = @_; 519 520 my $i; 521 my $start = find_starting_index($index); 522 my $end = find_ending_index($index); 523 524 push(@subsystem, $typevalue[$start]); 525 526 for ($i = $start + 1; $i < $end; $i++) { 527 my $tv = $typevalue[$i]; 528 if ($tv =~ m/^(\C):\s*(.*)/) { 529 my $ptype = $1; 530 my $pvalue = $2; 531 if ($ptype eq "L") { 532 my $list_address = $pvalue; 533 my $list_additional = ""; 534 if ($list_address =~ m/([^\s]+)\s+(.*)$/) { 535 $list_address = $1; 536 $list_additional = $2; 537 } 538 if ($list_additional =~ m/subscribers-only/) { 539 if ($email_subscriber_list) { 540 push(@list_to, $list_address); 541 } 542 } else { 543 if ($email_list) { 544 push(@list_to, $list_address); 545 } 546 } 547 } elsif ($ptype eq "M") { 548 my ($name, $address) = parse_email($pvalue); 549 if ($name eq "") { 550 if ($i > 0) { 551 my $tv = $typevalue[$i - 1]; 552 if ($tv =~ m/^(\C):\s*(.*)/) { 553 if ($1 eq "P") { 554 $name = $2; 555 $pvalue = format_email($name, $address); 556 } 557 } 558 } 559 } 560 if ($email_maintainer) { 561 push_email_addresses($pvalue); 562 } 563 } elsif ($ptype eq "T") { 564 push(@scm, $pvalue); 565 } elsif ($ptype eq "W") { 566 push(@web, $pvalue); 567 } elsif ($ptype eq "S") { 568 push(@status, $pvalue); 569 } 570 } 571 } 572} 573 574my %email_hash_name; 575my %email_hash_address; 576 577sub email_inuse { 578 my ($name, $address) = @_; 579 580 return 1 if (($name eq "") && ($address eq "")); 581 return 1 if (($name ne "") && exists($email_hash_name{$name})); 582 return 1 if (($address ne "") && exists($email_hash_address{$address})); 583 584 return 0; 585} 586 587sub push_email_address { 588 my ($line) = @_; 589 590 my ($name, $address) = parse_email($line); 591 592 if ($address eq "") { 593 return 0; 594 } 595 596 if (!$email_remove_duplicates) { 597 push(@email_to, format_email($name, $address)); 598 } elsif (!email_inuse($name, $address)) { 599 push(@email_to, format_email($name, $address)); 600 $email_hash_name{$name}++; 601 $email_hash_address{$address}++; 602 } 603 604 return 1; 605} 606 607sub push_email_addresses { 608 my ($address) = @_; 609 610 my @address_list = (); 611 612 if (rfc822_valid($address)) { 613 push_email_address($address); 614 } elsif (@address_list = rfc822_validlist($address)) { 615 my $array_count = shift(@address_list); 616 while (my $entry = shift(@address_list)) { 617 push_email_address($entry); 618 } 619 } else { 620 if (!push_email_address($address)) { 621 warn("Invalid MAINTAINERS address: '" . $address . "'\n"); 622 } 623 } 624} 625 626sub which { 627 my ($bin) = @_; 628 629 foreach my $path (split(/:/, $ENV{PATH})) { 630 if (-e "$path/$bin") { 631 return "$path/$bin"; 632 } 633 } 634 635 return ""; 636} 637 638sub mailmap { 639 my @lines = @_; 640 my %hash; 641 642 foreach my $line (@lines) { 643 my ($name, $address) = parse_email($line); 644 if (!exists($hash{$name})) { 645 $hash{$name} = $address; 646 } elsif ($address ne $hash{$name}) { 647 $address = $hash{$name}; 648 $line = format_email($name, $address); 649 } 650 if (exists($mailmap{$name})) { 651 my $obj = $mailmap{$name}; 652 foreach my $map_address (@$obj) { 653 if (($map_address eq $address) && 654 ($map_address ne $hash{$name})) { 655 $line = format_email($name, $hash{$name}); 656 } 657 } 658 } 659 } 660 661 return @lines; 662} 663 664sub recent_git_signoffs { 665 my ($file) = @_; 666 667 my $sign_offs = ""; 668 my $cmd = ""; 669 my $output = ""; 670 my $count = 0; 671 my @lines = (); 672 my %hash; 673 my $total_sign_offs; 674 675 if (which("git") eq "") { 676 warn("$P: git not found. Add --nogit to options?\n"); 677 return; 678 } 679 if (!(-d ".git")) { 680 warn("$P: .git directory not found. Use a git repository for better results.\n"); 681 warn("$P: perhaps 'git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git'\n"); 682 return; 683 } 684 685 $cmd = "git log --since=${email_git_since} -- ${file}"; 686 687 $output = `${cmd}`; 688 $output =~ s/^\s*//gm; 689 690 @lines = split("\n", $output); 691 692 @lines = grep(/^[-_ a-z]+by:.*\@.*$/i, @lines); 693 if (!$email_git_penguin_chiefs) { 694 @lines = grep(!/${penguin_chiefs}/i, @lines); 695 } 696 # cut -f2- -d":" 697 s/.*:\s*(.+)\s*/$1/ for (@lines); 698 699 $total_sign_offs = @lines; 700 701 if ($email_remove_duplicates) { 702 @lines = mailmap(@lines); 703 } 704 705 @lines = sort(@lines); 706 707 # uniq -c 708 $hash{$_}++ for @lines; 709 710 # sort -rn 711 foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { 712 my $sign_offs = $hash{$line}; 713 $count++; 714 last if ($sign_offs < $email_git_min_signatures || 715 $count > $email_git_max_maintainers || 716 $sign_offs * 100 / $total_sign_offs < $email_git_min_percent); 717 push_email_address($line); 718 } 719} 720 721sub save_commits { 722 my ($cmd, @commits) = @_; 723 my $output; 724 my @lines = (); 725 726 $output = `${cmd}`; 727 728 @lines = split("\n", $output); 729 foreach my $line (@lines) { 730 if ($line =~ m/^(\w+) /) { 731 push (@commits, $1); 732 } 733 } 734 return @commits; 735} 736 737sub git_assign_blame { 738 my ($file) = @_; 739 740 my @lines = (); 741 my @commits = (); 742 my $cmd; 743 my $output; 744 my %hash; 745 my $total_sign_offs; 746 my $count; 747 748 if (@range) { 749 foreach my $file_range_diff (@range) { 750 next if (!($file_range_diff =~ m/(.+):(.+):(.+)/)); 751 my $diff_file = $1; 752 my $diff_start = $2; 753 my $diff_length = $3; 754 next if (!("$file" eq "$diff_file")); 755 $cmd = "git blame -l -L $diff_start,+$diff_length $file"; 756 @commits = save_commits($cmd, @commits); 757 } 758 } else { 759 if (-f $file) { 760 $cmd = "git blame -l $file"; 761 @commits = save_commits($cmd, @commits); 762 } 763 } 764 765 $total_sign_offs = 0; 766 @commits = uniq(@commits); 767 foreach my $commit (@commits) { 768 $cmd = "git log -1 ${commit}"; 769 770 $output = `${cmd}`; 771 $output =~ s/^\s*//gm; 772 @lines = split("\n", $output); 773 774 @lines = grep(/^[-_ a-z]+by:.*\@.*$/i, @lines); 775 if (!$email_git_penguin_chiefs) { 776 @lines = grep(!/${penguin_chiefs}/i, @lines); 777 } 778 779 # cut -f2- -d":" 780 s/.*:\s*(.+)\s*/$1/ for (@lines); 781 782 $total_sign_offs += @lines; 783 784 if ($email_remove_duplicates) { 785 @lines = mailmap(@lines); 786 } 787 788 $hash{$_}++ for @lines; 789 } 790 791 $count = 0; 792 foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { 793 my $sign_offs = $hash{$line}; 794 $count++; 795 last if ($sign_offs < $email_git_min_signatures || 796 $count > $email_git_max_maintainers || 797 $sign_offs * 100 / $total_sign_offs < $email_git_min_percent); 798 push_email_address($line); 799 } 800} 801 802sub uniq { 803 my @parms = @_; 804 805 my %saw; 806 @parms = grep(!$saw{$_}++, @parms); 807 return @parms; 808} 809 810sub sort_and_uniq { 811 my @parms = @_; 812 813 my %saw; 814 @parms = sort @parms; 815 @parms = grep(!$saw{$_}++, @parms); 816 return @parms; 817} 818 819sub output { 820 my @parms = @_; 821 822 if ($output_multiline) { 823 foreach my $line (@parms) { 824 print("${line}\n"); 825 } 826 } else { 827 print(join($output_separator, @parms)); 828 print("\n"); 829 } 830} 831 832my $rfc822re; 833 834sub make_rfc822re { 835# Basic lexical tokens are specials, domain_literal, quoted_string, atom, and 836# comment. We must allow for rfc822_lwsp (or comments) after each of these. 837# This regexp will only work on addresses which have had comments stripped 838# and replaced with rfc822_lwsp. 839 840 my $specials = '()<>@,;:\\\\".\\[\\]'; 841 my $controls = '\\000-\\037\\177'; 842 843 my $dtext = "[^\\[\\]\\r\\\\]"; 844 my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*"; 845 846 my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*"; 847 848# Use zero-width assertion to spot the limit of an atom. A simple 849# $rfc822_lwsp* causes the regexp engine to hang occasionally. 850 my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))"; 851 my $word = "(?:$atom|$quoted_string)"; 852 my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*"; 853 854 my $sub_domain = "(?:$atom|$domain_literal)"; 855 my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*"; 856 857 my $addr_spec = "$localpart\@$rfc822_lwsp*$domain"; 858 859 my $phrase = "$word*"; 860 my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)"; 861 my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*"; 862 my $mailbox = "(?:$addr_spec|$phrase$route_addr)"; 863 864 my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*"; 865 my $address = "(?:$mailbox|$group)"; 866 867 return "$rfc822_lwsp*$address"; 868} 869 870sub rfc822_strip_comments { 871 my $s = shift; 872# Recursively remove comments, and replace with a single space. The simpler 873# regexps in the Email Addressing FAQ are imperfect - they will miss escaped 874# chars in atoms, for example. 875 876 while ($s =~ s/^((?:[^"\\]|\\.)* 877 (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*) 878 \((?:[^()\\]|\\.)*\)/$1 /osx) {} 879 return $s; 880} 881 882# valid: returns true if the parameter is an RFC822 valid address 883# 884sub rfc822_valid ($) { 885 my $s = rfc822_strip_comments(shift); 886 887 if (!$rfc822re) { 888 $rfc822re = make_rfc822re(); 889 } 890 891 return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/; 892} 893 894# validlist: In scalar context, returns true if the parameter is an RFC822 895# valid list of addresses. 896# 897# In list context, returns an empty list on failure (an invalid 898# address was found); otherwise a list whose first element is the 899# number of addresses found and whose remaining elements are the 900# addresses. This is needed to disambiguate failure (invalid) 901# from success with no addresses found, because an empty string is 902# a valid list. 903 904sub rfc822_validlist ($) { 905 my $s = rfc822_strip_comments(shift); 906 907 if (!$rfc822re) { 908 $rfc822re = make_rfc822re(); 909 } 910 # * null list items are valid according to the RFC 911 # * the '1' business is to aid in distinguishing failure from no results 912 913 my @r; 914 if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so && 915 $s =~ m/^$rfc822_char*$/) { 916 while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) { 917 push @r, $1; 918 } 919 return wantarray ? (scalar(@r), @r) : 1; 920 } 921 else { 922 return wantarray ? () : 0; 923 } 924} 925