Skip to content

Commit

Permalink
scripts/get_maintainer.pl: add --file-emails, find embedded email add…
Browse files Browse the repository at this point in the history
…resses

Add an imperfect option to search a source file for email addresses.

New option:  --file-emails or --fe

email addresses in files are freeform text and are nearly impossible to
parse.  Still, might as well try to do a somewhat acceptable job of
finding them.  This code should find all addresses that are in the form
[email protected]

The code assumes that up to 3 alphabetic words along with dashes, commas,
and periods that preceed the email address are a name.

If 3 words are found for the name, and one of the first two words are a
single letter and period, or just a single letter then the 3 words are use
as name otherwise the last 2 words are used.

Some variants that are shown correctly:
    John Smith <[email protected]>
    Random J. Developer <[email protected]>
    Random J. Developer ([email protected])
    J. Random Developer [email protected]

Variants that are shown nominally correctly:
    Written by First Last ([email protected])
is shown as:
    First Last <[email protected]>

Variants that are shown incorrectly:
    Some Really Long Name <[email protected]>
    MontaVista Software, Inc. <[email protected]>
are returned as:
    Long Name <[email protected]>
    "Software, Inc" <[email protected]>

--roles and --rolestats show "(in file)" for matches.

For instance:

Without -file-emails:

$ ./scripts/get_maintainer.pl -f -nogit -roles net/core/netpoll.c
David S. Miller <[email protected]> (maintainer:NETWORKING [GENERAL])
[email protected] (open list)

With -fe:

$ ./scripts/get_maintainer.pl -f -fe -nogit -roles net/core/netpoll.c
David S. Miller <[email protected]> (maintainer:NETWORKING [GENERAL])
Matt Mackall <[email protected]> (in file)
Ingo Molnar <[email protected]> (in file)
[email protected] (open list)
[email protected] (open list:NETWORKING [GENERAL])

The number of email addresses in the file in not limited.  Neither is the
number of returned email addresses.

Signed-off-by: Joe Perches <[email protected]>
Cc: Matt Mackall <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
JoePerches authored and torvalds committed Mar 6, 2010
1 parent cea8388 commit 03372db
Showing 1 changed file with 76 additions and 7 deletions.
83 changes: 76 additions & 7 deletions scripts/get_maintainer.pl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
my $subsystem = 0;
my $status = 0;
my $keywords = 1;
my $file_emails = 0;
my $from_filename = 0;
my $pattern_depth = 0;
my $version = 0;
Expand Down Expand Up @@ -120,6 +121,7 @@
'web!' => \$web,
'pattern-depth=i' => \$pattern_depth,
'k|keywords!' => \$keywords,
'fe|file-emails!' => \$file_emails,
'f|file' => \$from_filename,
'v|version' => \$version,
'h|help' => \$help,
Expand Down Expand Up @@ -232,6 +234,7 @@
my @files = ();
my @range = ();
my @keyword_tvi = ();
my @file_emails = ();

foreach my $file (@ARGV) {
##if $file is a directory and it lacks a trailing slash, add one
Expand All @@ -242,15 +245,21 @@
}
if ($from_filename) {
push(@files, $file);
if (-f $file && $keywords) {
if (-f $file && ($keywords || $file_emails)) {
open(FILE, "<$file") or die "$P: Can't open ${file}\n";
my $text = do { local($/) ; <FILE> };
foreach my $line (keys %keyword_hash) {
if ($text =~ m/$keyword_hash{$line}/x) {
push(@keyword_tvi, $line);
close(FILE);
if ($keywords) {
foreach my $line (keys %keyword_hash) {
if ($text =~ m/$keyword_hash{$line}/x) {
push(@keyword_tvi, $line);
}
}
}
close(FILE);
if ($file_emails) {
my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
push(@file_emails, clean_file_emails(@poss_addr));
}
}
} else {
my $file_cnt = @files;
Expand Down Expand Up @@ -285,6 +294,8 @@
}
}

@file_emails = uniq(@file_emails);

my @email_to = ();
my @list_to = ();
my @scm = ();
Expand Down Expand Up @@ -377,6 +388,14 @@
}
}
}

foreach my $email (@file_emails) {
my ($name, $address) = parse_email($email);

my $tmp_email = format_email($name, $address, $email_usename);
push_email_address($tmp_email, '');
add_role($tmp_email, 'in file');
}
}

if ($email || $email_list) {
Expand Down Expand Up @@ -453,6 +472,7 @@ sub usage {
--remove-duplicates => minimize duplicate email names/addresses
--roles => show roles (status:subsystem, git-signer, list, etc...)
--rolestats => show roles and statistics (commits/total_commits, %)
--file-emails => add email addresses found in -f file (default: 0 (off))
--scm => print SCM tree(s) if any
--status => print status if any
--subsystem => print subsystem name if any
Expand Down Expand Up @@ -811,15 +831,19 @@ sub add_role {
foreach my $entry (@email_to) {
if ($email_remove_duplicates) {
my ($entry_name, $entry_address) = parse_email($entry->[0]);
if ($name eq $entry_name || $address eq $entry_address) {
if (($name eq $entry_name || $address eq $entry_address)
&& ($role eq "" || !($entry->[1] =~ m/$role/))
) {
if ($entry->[1] eq "") {
$entry->[1] = "$role";
} else {
$entry->[1] = "$entry->[1],$role";
}
}
} else {
if ($email eq $entry->[0]) {
if ($email eq $entry->[0]
&& ($role eq "" || !($entry->[1] =~ m/$role/))
) {
if ($entry->[1] eq "") {
$entry->[1] = "$role";
} else {
Expand Down Expand Up @@ -1099,6 +1123,51 @@ sub sort_and_uniq {
return @parms;
}

sub clean_file_emails {
my (@file_emails) = @_;
my @fmt_emails = ();

foreach my $email (@file_emails) {
$email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
my ($name, $address) = parse_email($email);
if ($name eq '"[,\.]"') {
$name = "";
}

my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
if (@nw > 2) {
my $first = $nw[@nw - 3];
my $middle = $nw[@nw - 2];
my $last = $nw[@nw - 1];

if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
(length($first) == 2 && substr($first, -1) eq ".")) ||
(length($middle) == 1 ||
(length($middle) == 2 && substr($middle, -1) eq "."))) {
$name = "$first $middle $last";
} else {
$name = "$middle $last";
}
}

if (substr($name, -1) =~ /[,\.]/) {
$name = substr($name, 0, length($name) - 1);
} elsif (substr($name, -2) =~ /[,\.]"/) {
$name = substr($name, 0, length($name) - 2) . '"';
}

if (substr($name, 0, 1) =~ /[,\.]/) {
$name = substr($name, 1, length($name) - 1);
} elsif (substr($name, 0, 2) =~ /"[,\.]/) {
$name = '"' . substr($name, 2, length($name) - 2);
}

my $fmt_email = format_email($name, $address, $email_usename);
push(@fmt_emails, $fmt_email);
}
return @fmt_emails;
}

sub merge_email {
my @lines;
my %saw;
Expand Down

0 comments on commit 03372db

Please sign in to comment.