From 66b47b4a9dad00e45c049d79966de9a3a1f4d337 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 13 Oct 2014 15:51:57 -0700 Subject: checkpatch: look for common misspellings Check for misspellings, based on Debian's lintian list. Several false positives were removed, and several additional words added that were common in the kernel: backword backwords invalide valide recieves singed unsinged While going back and fixing existing spelling mistakes isn't a high priority, it'd be nice to try to catch them before they hit the tree. In the 13830 commits between 3.15 and 3.16, the script would have noticed 560 spelling mistakes. The top 25 are shown here: $ git log --pretty=oneline v3.15..v3.16 | wc -l 13830 $ git log --format='%H' v3.15..v3.16 | \ while read commit ; do \ echo "commit $commit" ; \ git log --format=email --stat -p -1 $commit | \ ./scripts/checkpatch.pl --types=typo_spelling --no-summary - ; \ done | tee spell_v3.15..v3.16.txt | grep "may be misspelled" | \ awk '{print $2}' | tr A-Z a-z | sort | uniq -c | sort -rn 21 'seperate' 17 'endianess' 15 'sucess' 13 'noticable' 11 'occured' 11 'accomodate' 10 'interrup' 9 'prefered' 8 'unecessary' 8 'explicitely' 7 'supress' 7 'overriden' 7 'immediatly' 7 'funtion' 7 'defult' 7 'childs' 6 'succesful' 6 'splitted' 6 'specifc' 6 'reseting' 6 'recieve' 6 'changable' 5 'tmis' 5 'singed' 5 'preceeding' Thanks to Joe Perches for rewrites, suggestions, additional misspelling entries, and testing. Signed-off-by: Kees Cook Acked-by: Joe Perches Cc: Masanari Iida Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'scripts/checkpatch.pl') diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 7a360a8c1b91..74bba23a8df0 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -9,7 +9,8 @@ use strict; use POSIX; my $P = $0; -$P =~ s@.*/@@g; +$P =~ s@(.*)/@@g; +my $D = $1; my $V = '0.32'; @@ -44,6 +45,7 @@ my $max_line_length = 80; my $ignore_perl_version = 0; my $minimum_perl_version = 5.10.0; my $min_conf_desc_length = 4; +my $spelling_file = "$D/spelling.txt"; sub help { my ($exitcode) = @_; @@ -434,6 +436,29 @@ our $allowed_asm_includes = qr{(?x: )}; # memory.h: ARM has a custom one +# Load common spelling mistakes and build regular expression list. +my $misspellings; +my @spelling_list; +my %spelling_fix; +open(my $spelling, '<', $spelling_file) + or die "$P: Can't open $spelling_file for reading: $!\n"; +while (<$spelling>) { + my $line = $_; + + $line =~ s/\s*\n?$//g; + $line =~ s/^\s*//g; + + next if ($line =~ m/^\s*#/); + next if ($line =~ m/^\s*$/); + + my ($suspect, $fix) = split(/\|\|/, $line); + + push(@spelling_list, $suspect); + $spelling_fix{$suspect} = $fix; +} +close($spelling); +$misspellings = join("|", @spelling_list); + sub build_types { my $mods = "(?x: \n" . join("|\n ", @modifierList) . "\n)"; my $all = "(?x: \n" . join("|\n ", @typeList) . "\n)"; @@ -2220,6 +2245,23 @@ sub process { "8-bit UTF-8 used in possible commit log\n" . $herecurr); } +# Check for various typo / spelling mistakes + if ($in_commit_log || $line =~ /^\+/) { + while ($rawline =~ /(?:^|[^a-z@])($misspellings)(?:$|[^a-z@])/gi) { + my $typo = $1; + my $typo_fix = $spelling_fix{lc($typo)}; + $typo_fix = ucfirst($typo_fix) if ($typo =~ /^[A-Z]/); + $typo_fix = uc($typo_fix) if ($typo =~ /^[A-Z]+$/); + my $msg_type = \&WARN; + $msg_type = \&CHK if ($file); + if (&{$msg_type}("TYPO_SPELLING", + "'$typo' may be misspelled - perhaps '$typo_fix'?\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/(^|[^A-Za-z@])($typo)($|[^A-Za-z@])/$1$typo_fix$3/; + } + } + } + # ignore non-hunk lines and lines being removed next if (!$hunk_line || $line =~ /^-/); -- cgit v1.2.3