summaryrefslogtreecommitdiff
path: root/scripts/get_maintainer.pl
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/get_maintainer.pl')
-rwxr-xr-xscripts/get_maintainer.pl421
1 files changed, 329 insertions, 92 deletions
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 278a45bd45a5..cdb44b63342e 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -13,7 +13,7 @@
use strict;
my $P = $0;
-my $V = '0.17';
+my $V = '0.20';
use Getopt::Long qw(:config no_auto_abbrev);
@@ -29,6 +29,8 @@ my $email_git_min_signatures = 1;
my $email_git_max_maintainers = 5;
my $email_git_min_percent = 5;
my $email_git_since = "1-year-ago";
+my $email_git_blame = 0;
+my $email_remove_duplicates = 1;
my $output_multiline = 1;
my $output_separator = ", ";
my $scm = 0;
@@ -36,6 +38,7 @@ my $web = 0;
my $subsystem = 0;
my $status = 0;
my $from_filename = 0;
+my $pattern_depth = 0;
my $version = 0;
my $help = 0;
@@ -68,6 +71,8 @@ if (!GetOptions(
'git-max-maintainers=i' => \$email_git_max_maintainers,
'git-min-percent=i' => \$email_git_min_percent,
'git-since=s' => \$email_git_since,
+ 'git-blame!' => \$email_git_blame,
+ 'remove-duplicates!' => \$email_remove_duplicates,
'm!' => \$email_maintainer,
'n!' => \$email_usename,
'l!' => \$email_list,
@@ -78,6 +83,7 @@ if (!GetOptions(
'status!' => \$status,
'scm!' => \$scm,
'web!' => \$web,
+ 'pattern-depth=i' => \$pattern_depth,
'f|file' => \$from_filename,
'v|version' => \$version,
'h|help' => \$help,
@@ -101,14 +107,19 @@ if ($#ARGV < 0) {
die "$P: argument missing: patchfile or -f file please\n";
}
+if ($output_separator ne ", ") {
+ $output_multiline = 0;
+}
+
my $selections = $email + $scm + $status + $subsystem + $web;
if ($selections == 0) {
usage();
die "$P: Missing required option: email, scm, status, subsystem or web\n";
}
-if ($email && ($email_maintainer + $email_list + $email_subscriber_list
- + $email_git + $email_git_penguin_chiefs) == 0) {
+if ($email &&
+ ($email_maintainer + $email_list + $email_subscriber_list +
+ $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
usage();
die "$P: Please select at least 1 email option\n";
}
@@ -147,9 +158,36 @@ while (<MAINT>) {
}
close(MAINT);
+my %mailmap;
+
+if ($email_remove_duplicates) {
+ open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n";
+ while (<MAILMAP>) {
+ my $line = $_;
+
+ next if ($line =~ m/^\s*#/);
+ next if ($line =~ m/^\s*$/);
+
+ my ($name, $address) = parse_email($line);
+ $line = format_email($name, $address);
+
+ next if ($line =~ m/^\s*$/);
+
+ if (exists($mailmap{$name})) {
+ my $obj = $mailmap{$name};
+ push(@$obj, $address);
+ } else {
+ my @arr = ($address);
+ $mailmap{$name} = \@arr;
+ }
+ }
+ close(MAILMAP);
+}
+
## use the filenames on the command line or find the filenames in the patchfiles
my @files = ();
+my @range = ();
foreach my $file (@ARGV) {
##if $file is a directory and it lacks a trailing slash, add one
@@ -162,13 +200,19 @@ foreach my $file (@ARGV) {
push(@files, $file);
} else {
my $file_cnt = @files;
+ my $lastfile;
open(PATCH, "<$file") or die "$P: Can't open ${file}\n";
while (<PATCH>) {
if (m/^\+\+\+\s+(\S+)/) {
my $filename = $1;
$filename =~ s@^[^/]*/@@;
$filename =~ s@\n@@;
+ $lastfile = $filename;
push(@files, $filename);
+ } elsif (m/^\@\@ -(\d+),(\d+)/) {
+ if ($email_git_blame) {
+ push(@range, "$lastfile:$1:$2");
+ }
}
}
close(PATCH);
@@ -201,6 +245,7 @@ foreach my $file (@files) {
if ($type eq 'X') {
if (file_match_pattern($file, $value)) {
$exclude = 1;
+ last;
}
}
}
@@ -208,35 +253,45 @@ foreach my $file (@files) {
if (!$exclude) {
my $tvi = 0;
+ my %hash;
foreach my $line (@typevalue) {
if ($line =~ m/^(\C):\s*(.*)/) {
my $type = $1;
my $value = $2;
if ($type eq 'F') {
if (file_match_pattern($file, $value)) {
- add_categories($tvi);
+ my $value_pd = ($value =~ tr@/@@);
+ my $file_pd = ($file =~ tr@/@@);
+ $value_pd++ if (substr($value,-1,1) ne "/");
+ if ($pattern_depth == 0 ||
+ (($file_pd - $value_pd) < $pattern_depth)) {
+ $hash{$tvi} = $value_pd;
+ }
}
}
}
$tvi++;
}
+ foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
+ add_categories($line);
+ }
}
if ($email && $email_git) {
recent_git_signoffs($file);
}
+ if ($email && $email_git_blame) {
+ git_assign_blame($file);
+ }
}
if ($email) {
foreach my $chief (@penguin_chief) {
if ($chief =~ m/^(.*):(.*)/) {
my $email_address;
- if ($email_usename) {
- $email_address = format_email($1, $2);
- } else {
- $email_address = $2;
- }
+
+ $email_address = format_email($1, $2);
if ($email_git_penguin_chiefs) {
push(@email_to, $email_address);
} else {
@@ -258,22 +313,22 @@ if ($email || $email_list) {
}
if ($scm) {
- @scm = sort_and_uniq(@scm);
+ @scm = uniq(@scm);
output(@scm);
}
if ($status) {
- @status = sort_and_uniq(@status);
+ @status = uniq(@status);
output(@status);
}
if ($subsystem) {
- @subsystem = sort_and_uniq(@subsystem);
+ @subsystem = uniq(@subsystem);
output(@subsystem);
}
if ($web) {
- @web = sort_and_uniq(@web);
+ @web = uniq(@web);
output(@web);
}
@@ -311,10 +366,12 @@ MAINTAINER field selection options:
--git-max-maintainers => maximum maintainers to add (default: 5)
--git-min-percent => minimum percentage of commits required (default: 5)
--git-since => git history to use (default: 1-year-ago)
+ --git-blame => use git blame to find modified commits for patch or file
--m => include maintainer(s) if any
--n => include name 'Full Name <addr\@domain.tld>'
--l => include list(s) if any
--s => include subscriber only list(s) if any
+ --remove-duplicates => minimize duplicate email names/addresses
--scm => print SCM tree(s) if any
--status => print status if any
--subsystem => print subsystem name if any
@@ -322,24 +379,28 @@ MAINTAINER field selection options:
Output type options:
--separator [, ] => separator for multiple entries on 1 line
+ using --separator also sets --nomultiline if --separator is not [, ]
--multiline => print 1 entry per line
-Default options:
- [--email --git --m --n --l --multiline]
-
Other options:
+ --pattern-depth => Number of pattern directory traversals (default: 0 (all))
--version => show version
--help => show this help information
+Default options:
+ [--email --git --m --n --l --multiline --pattern-depth=0 --remove-duplicates]
+
Notes:
Using "-f directory" may give unexpected results:
-
- Used with "--git", git signators for _all_ files in and below
- directory are examined as git recurses directories.
- Any specified X: (exclude) pattern matches are _not_ ignored.
- Used with "--nogit", directory is used as a pattern match,
- no individual file within the directory or subdirectory
- is matched.
+ Used with "--git", git signators for _all_ files in and below
+ directory are examined as git recurses directories.
+ Any specified X: (exclude) pattern matches are _not_ ignored.
+ Used with "--nogit", directory is used as a pattern match,
+ no individual file within the directory or subdirectory
+ is matched.
+ Used with "--git-blame", does not iterate all files in directory
+ Using "--git-blame" is slow and may add old committers and authors
+ that are no longer active maintainers to the output.
EOT
}
@@ -370,30 +431,100 @@ sub top_of_kernel_tree {
return 0;
}
-sub format_email {
- my ($name, $email) = @_;
+sub parse_email {
+ my ($formatted_email) = @_;
+
+ my $name = "";
+ my $address = "";
+
+ if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
+ $name = $1;
+ $address = $2;
+ } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
+ $address = $1;
+ } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
+ $address = $1;
+ }
$name =~ s/^\s+|\s+$//g;
$name =~ s/^\"|\"$//g;
- $email =~ s/^\s+|\s+$//g;
+ $address =~ s/^\s+|\s+$//g;
+
+ if ($name =~ /[^a-z0-9 \.\-]/i) { ##has "must quote" chars
+ $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
+ $name = "\"$name\"";
+ }
+
+ return ($name, $address);
+}
+
+sub format_email {
+ my ($name, $address) = @_;
- my $formatted_email = "";
+ my $formatted_email;
+
+ $name =~ s/^\s+|\s+$//g;
+ $name =~ s/^\"|\"$//g;
+ $address =~ s/^\s+|\s+$//g;
if ($name =~ /[^a-z0-9 \.\-]/i) { ##has "must quote" chars
$name =~ s/(?<!\\)"/\\"/g; ##escape quotes
- $formatted_email = "\"${name}\"\ \<${email}\>";
+ $name = "\"$name\"";
+ }
+
+ if ($email_usename) {
+ if ("$name" eq "") {
+ $formatted_email = "$address";
+ } else {
+ $formatted_email = "$name <${address}>";
+ }
} else {
- $formatted_email = "${name} \<${email}\>";
+ $formatted_email = $address;
}
+
return $formatted_email;
}
-sub add_categories {
+sub find_starting_index {
+
my ($index) = @_;
- $index = $index - 1;
- while ($index >= 0) {
+ while ($index > 0) {
my $tv = $typevalue[$index];
+ if (!($tv =~ m/^(\C):\s*(.*)/)) {
+ last;
+ }
+ $index--;
+ }
+
+ return $index;
+}
+
+sub find_ending_index {
+ my ($index) = @_;
+
+ while ($index < @typevalue) {
+ my $tv = $typevalue[$index];
+ if (!($tv =~ m/^(\C):\s*(.*)/)) {
+ last;
+ }
+ $index++;
+ }
+
+ return $index;
+}
+
+sub add_categories {
+ my ($index) = @_;
+
+ my $i;
+ my $start = find_starting_index($index);
+ my $end = find_ending_index($index);
+
+ push(@subsystem, $typevalue[$start]);
+
+ for ($i = $start + 1; $i < $end; $i++) {
+ my $tv = $typevalue[$i];
if ($tv =~ m/^(\C):\s*(.*)/) {
my $ptype = $1;
my $pvalue = $2;
@@ -414,19 +545,19 @@ sub add_categories {
}
}
} elsif ($ptype eq "M") {
- my $p_used = 0;
- if ($index >= 0) {
- my $tv = $typevalue[$index - 1];
- if ($tv =~ m/^(\C):\s*(.*)/) {
- if ($1 eq "P") {
- if ($email_usename) {
- push_email_address(format_email($2, $pvalue));
- $p_used = 1;
+ my ($name, $address) = parse_email($pvalue);
+ if ($name eq "") {
+ if ($i > 0) {
+ my $tv = $typevalue[$i - 1];
+ if ($tv =~ m/^(\C):\s*(.*)/) {
+ if ($1 eq "P") {
+ $name = $2;
+ $pvalue = format_email($name, $address);
}
}
}
}
- if (!$p_used) {
+ if ($email_maintainer) {
push_email_addresses($pvalue);
}
} elsif ($ptype eq "T") {
@@ -436,31 +567,41 @@ sub add_categories {
} elsif ($ptype eq "S") {
push(@status, $pvalue);
}
-
- $index--;
- } else {
- push(@subsystem,$tv);
- $index = -1;
}
}
}
+my %email_hash_name;
+my %email_hash_address;
+
+sub email_inuse {
+ my ($name, $address) = @_;
+
+ return 1 if (($name eq "") && ($address eq ""));
+ return 1 if (($name ne "") && exists($email_hash_name{$name}));
+ return 1 if (($address ne "") && exists($email_hash_address{$address}));
+
+ return 0;
+}
+
sub push_email_address {
- my ($email_address) = @_;
+ my ($line) = @_;
+
+ my ($name, $address) = parse_email($line);
- my $email_name = "";
- if ($email_address =~ m/([^<]+)<(.*\@.*)>$/) {
- $email_name = $1;
- $email_address = $2;
+ if ($address eq "") {
+ return 0;
}
- if ($email_maintainer) {
- if ($email_usename && $email_name) {
- push(@email_to, format_email($email_name, $email_address));
- } else {
- push(@email_to, $email_address);
- }
+ if (!$email_remove_duplicates) {
+ push(@email_to, format_email($name, $address));
+ } elsif (!email_inuse($name, $address)) {
+ push(@email_to, format_email($name, $address));
+ $email_hash_name{$name}++;
+ $email_hash_address{$address}++;
}
+
+ return 1;
}
sub push_email_addresses {
@@ -476,7 +617,9 @@ sub push_email_addresses {
push_email_address($entry);
}
} else {
- warn("Invalid MAINTAINERS address: '" . $address . "'\n");
+ if (!push_email_address($address)) {
+ warn("Invalid MAINTAINERS address: '" . $address . "'\n");
+ }
}
}
@@ -492,6 +635,32 @@ sub which {
return "";
}
+sub mailmap {
+ my @lines = @_;
+ my %hash;
+
+ foreach my $line (@lines) {
+ my ($name, $address) = parse_email($line);
+ if (!exists($hash{$name})) {
+ $hash{$name} = $address;
+ } elsif ($address ne $hash{$name}) {
+ $address = $hash{$name};
+ $line = format_email($name, $address);
+ }
+ if (exists($mailmap{$name})) {
+ my $obj = $mailmap{$name};
+ foreach my $map_address (@$obj) {
+ if (($map_address eq $address) &&
+ ($map_address ne $hash{$name})) {
+ $line = format_email($name, $hash{$name});
+ }
+ }
+ }
+ }
+
+ return @lines;
+}
+
sub recent_git_signoffs {
my ($file) = @_;
@@ -500,6 +669,7 @@ sub recent_git_signoffs {
my $output = "";
my $count = 0;
my @lines = ();
+ my %hash;
my $total_sign_offs;
if (which("git") eq "") {
@@ -513,52 +683,119 @@ sub recent_git_signoffs {
}
$cmd = "git log --since=${email_git_since} -- ${file}";
- $cmd .= " | grep -Ei \"^[-_ a-z]+by:.*\\\@.*\$\"";
- if (!$email_git_penguin_chiefs) {
- $cmd .= " | grep -Ev \"${penguin_chiefs}\"";
- }
- $cmd .= " | cut -f2- -d\":\"";
- $cmd .= " | sort | uniq -c | sort -rn";
$output = `${cmd}`;
$output =~ s/^\s*//gm;
@lines = split("\n", $output);
- $total_sign_offs = 0;
+ @lines = grep(/^[-_ a-z]+by:.*\@.*$/i, @lines);
+ if (!$email_git_penguin_chiefs) {
+ @lines = grep(!/${penguin_chiefs}/i, @lines);
+ }
+ # cut -f2- -d":"
+ s/.*:\s*(.+)\s*/$1/ for (@lines);
+
+ $total_sign_offs = @lines;
+
+ if ($email_remove_duplicates) {
+ @lines = mailmap(@lines);
+ }
+
+ @lines = sort(@lines);
+
+ # uniq -c
+ $hash{$_}++ for @lines;
+
+ # sort -rn
+ foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
+ my $sign_offs = $hash{$line};
+ $count++;
+ last if ($sign_offs < $email_git_min_signatures ||
+ $count > $email_git_max_maintainers ||
+ $sign_offs * 100 / $total_sign_offs < $email_git_min_percent);
+ push_email_address($line);
+ }
+}
+
+sub save_commits {
+ my ($cmd, @commits) = @_;
+ my $output;
+ my @lines = ();
+
+ $output = `${cmd}`;
+
+ @lines = split("\n", $output);
foreach my $line (@lines) {
- if ($line =~ m/([0-9]+)\s+(.*)/) {
- $total_sign_offs += $1;
- } else {
- die("$P: Unexpected git output: ${line}\n");
+ if ($line =~ m/^(\w+) /) {
+ push (@commits, $1);
}
}
+ return @commits;
+}
- foreach my $line (@lines) {
- if ($line =~ m/([0-9]+)\s+(.*)/) {
- my $sign_offs = $1;
- $line = $2;
- $count++;
- if ($sign_offs < $email_git_min_signatures ||
- $count > $email_git_max_maintainers ||
- $sign_offs * 100 / $total_sign_offs < $email_git_min_percent) {
- last;
- }
+sub git_assign_blame {
+ my ($file) = @_;
+
+ my @lines = ();
+ my @commits = ();
+ my $cmd;
+ my $output;
+ my %hash;
+ my $total_sign_offs;
+ my $count;
+
+ if (@range) {
+ foreach my $file_range_diff (@range) {
+ next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
+ my $diff_file = $1;
+ my $diff_start = $2;
+ my $diff_length = $3;
+ next if (!("$file" eq "$diff_file"));
+ $cmd = "git blame -l -L $diff_start,+$diff_length $file";
+ @commits = save_commits($cmd, @commits);
}
- if ($line =~ m/(.+)<(.+)>/) {
- my $git_name = $1;
- my $git_addr = $2;
- if ($email_usename) {
- push(@email_to, format_email($git_name, $git_addr));
- } else {
- push(@email_to, $git_addr);
- }
- } elsif ($line =~ m/<(.+)>/) {
- my $git_addr = $1;
- push(@email_to, $git_addr);
- } else {
- push(@email_to, $line);
+ } else {
+ if (-f $file) {
+ $cmd = "git blame -l $file";
+ @commits = save_commits($cmd, @commits);
+ }
+ }
+
+ $total_sign_offs = 0;
+ @commits = uniq(@commits);
+ foreach my $commit (@commits) {
+ $cmd = "git log -1 ${commit}";
+
+ $output = `${cmd}`;
+ $output =~ s/^\s*//gm;
+ @lines = split("\n", $output);
+
+ @lines = grep(/^[-_ a-z]+by:.*\@.*$/i, @lines);
+ if (!$email_git_penguin_chiefs) {
+ @lines = grep(!/${penguin_chiefs}/i, @lines);
+ }
+
+ # cut -f2- -d":"
+ s/.*:\s*(.+)\s*/$1/ for (@lines);
+
+ $total_sign_offs += @lines;
+
+ if ($email_remove_duplicates) {
+ @lines = mailmap(@lines);
}
+
+ $hash{$_}++ for @lines;
+ }
+
+ $count = 0;
+ foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
+ my $sign_offs = $hash{$line};
+ $count++;
+ last if ($sign_offs < $email_git_min_signatures ||
+ $count > $email_git_max_maintainers ||
+ $sign_offs * 100 / $total_sign_offs < $email_git_min_percent);
+ push_email_address($line);
}
}