2 # Copyright (c) 2007-2018 Olly Betts
4 # Permission is hereby granted, free of charge, to any person obtaining a copy
5 # of this software and associated documentation files (the "Software"), to
6 # deal in the Software without restriction, including without limitation the
7 # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 # sell copies of the Software, and to permit persons to whom the Software is
9 # furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice shall be included in
12 # all copies or substantial portions of the Software.
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 if (defined $ARGV[0] && $ARGV[0] eq '--help') {
30 Nit-pick Xapian patches.
32 A patch can be supplied on stdin, or one or more patch files listed on the
35 Produces output suitable for use with vim's quick-fix mode, and similar
36 features in other editors.
40 git diff master.. | xapian-check-patch > tmp.qf
50 my ($type, $msg, $fullline) = @_;
51 print "$fnm:$lineno: $type: $msg";
52 if (defined $fullline) {
63 # SVN property changes don't have an "Index: [...]" line.
65 my $check_trailing = 0;
66 my $check_space_tab = 0;
69 my $header_guard_macro;
70 my $last_first_char = '';
73 my $preproc_continuation;
75 if (/^Index: (.+)/ || m!^diff --git a/.+ b/(.+)!) {
81 $header_guard_macro = undef;
84 $preproc_continuation = 0;
87 if ($fnm =~ /\.cc$/) {
88 if ($fnm !~ m!\b(?:cdb|portability/)! &&
89 $fnm !~ m!\bcommon/getopt\.cc$! &&
90 $fnm !~ m!\bomega/md5\.cc$! &&
91 $fnm !~ m!\bcommon/msvc_dirent\.cc$!) {
93 $want_tabs = 1 unless ($fnm =~ m!\blanguages/steminternal\.cc$!);
95 } elsif ($fnm =~ /\.c$/) {
96 if ($fnm !~ m!\blanguages/compiler/! &&
97 $fnm !~ m!/lemon\.c$! &&
98 $fnm !~ m!/xapdep\.c$!) {
102 } elsif ($fnm =~ /\.h$/) {
103 if ($fnm !~ m!\binclude/xapian/intrusive_ptr\.h! &&
104 $fnm !~ m!\blanguages/compiler/! &&
105 $fnm !~ m!\bcommon/msvc_dirent\.h$! &&
106 $fnm !~ m!\bmatcher/heap\.h$!) {
108 $want_tabs = 1 unless ($fnm =~ m!/omega/cdb!);
110 } elsif ($fnm =~ /\.py(?:\.in)?$/) {
113 } elsif ($fnm =~ m!(?:^|/)ChangeLog\b!) {
119 $fnm !~ m!\bcommon/msvc_dirent\.! &&
120 $fnm !~ m!/lemon\.c$! &&
121 $fnm !~ m!/queryparser\.lt$! &&
123 $fnm !~ m!/testdata/etext\.txt$!;
125 $fnm !~ /\.patch$/ &&
127 # print STDERR "$fnm: lang=" . ($lang // "UNKNOWN") . "\;
130 my $pre3 = substr($_, 0, 3);
131 if ($pre3 eq '@@ ') {
132 /^\@\@ -\d+,\d+ \+(\d+),\d+\b/ and $lineno = $1;
136 if ($pre3 eq '---' || $pre3 eq '+++') {
140 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c')) {
141 # Uncomment commented out parameter names: foo(int /*bar*/) -> foo(int bar)
142 s!/\*([A-Za-z_][A-Za-z_0-9]*)\*/([,)])!$1$2!g;
144 # Check for comments without a space before the comment text.
145 if (m!^\+.*\s/[*/]{1,2}[A-Za-z0-9]!) {
146 diagnostic
('error', "Missing space between comment characters and comment text", $fullline);
150 if (s!/(?:\*.*?\*/|/.*)!!g) {
157 # Trim content of comments ending on this line:
158 if (s!^(.).*\*/!$1*/!) {
164 # Drop comment content for "*" continuation lines (when /* isn't in hunk):
165 s/^(.)(\s*\*).*/$1$2/;
167 } elsif (defined $lang && $lang eq 'py') {
174 # Replace multiple spaces before line continuation marker:
177 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c')) {
178 if (substr($_, 0, 1) eq '+') {
179 my $expandedline = '';
180 for my $i (1..length($fullline) - 1) {
181 my $ch = substr($fullline, $i, 1);
183 $expandedline .= ('.' x
(8 - length($expandedline) % 8));
185 $expandedline .= $ch;
188 chomp($expandedline);
189 if (length($expandedline) > 80 && !/^\+[ \t]*LOGCALL/) {
190 diagnostic
('error', "Line extends beyond column 80 (to column ".length($expandedline).")", $fullline);
193 if (m
,^\
+\s
+LOGCALL
(?
:_
[A
-Z0
-9]+)*\
([^"]*"[^"]*(?<!operator)\(,) {
194 diagnostic('error', "Don
't include parentheses in debug logging method/class name", $fullline);
196 # Replace string literals containing escaped quotes:
198 my $quote = substr($_, $-[0], 1);
202 QUOTELOOP: while (1) {
203 if ($i >= length($_)) {
204 $_ = substr($_, 0, $start) . "X
\n";
207 my $c = substr($_, $i, 1);
209 $_ = substr($_, 0, $start) . "X
" . substr($_, $i);
211 # See if there's another string after this one:
212 while ($i != length($_)) {
213 $c = substr($_, $i, 1);
215 if ($c eq '"' || $c eq "'") {
226 $c = substr($_, $i, 1);
228 ++$i while (substr($_, $i, 1) =~ /^[A-Fa-f0-9]$/);
230 } elsif ($c =~ /^[0-7]/) {
232 ++$i while ($i - $j <= 3 && substr($_, $i, 1) =~ /^[0-7]$/);
234 } elsif ($c eq '"' || $c eq "'") {
243 if ($check_trailing && $fullline =~ /^\+.*[ \t]$/) {
244 diagnostic('error', "added
/changed line has trailing whitespace
", $fullline);
246 if ($check_space_tab && /^\+.* \t/) {
247 diagnostic('error', "added
/changed line has space before tab
", $fullline);
249 if ($want_tabs == 1 and /^\+\t* {8}/) {
250 diagnostic('error', "added
/changed line uses spaces
for indentation rather than tab
", $fullline);
252 if (!$want_tabs and /^\+ *\t/) {
253 diagnostic('error', "added
/changed line uses tab
for indentation rather than spaces
", $fullline);
255 if ((!defined $lang || $lang ne 'changelog') && $fullline =~ /^([-+]).*\bFIX(?:ME)\b/) {
256 # Break up the string in the regexp above and messages below to avoid
257 # this triggering on its own code!
259 # Not an error, but interesting information.
260 diagnostic('info', "FIX
"."ME removed
", $fullline);
262 # Not an error, but not good.
263 diagnostic('warning', "FIX
"."ME added
", $fullline);
266 if (defined $lang && $lang ne 'changelog' && /^\+.*\\([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
267 diagnostic('error', "Doxygen command
'\\$1' introduced by
'\\' not '\@'", $fullline);
269 if (defined $lang && $lang ne 'changelog' && /^\+.*@\s+([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
270 diagnostic('error', "Broken Doxygen command
: whitespace between
'\@' and '$1'", $fullline);
272 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c')) {
273 if (/^([-+ ])(\s*)\#/) {
274 # Avoid misfiring for something like:
277 if (!$preproc_continuation) {
278 if ($1 eq '+' && $2 ne '') {
279 diagnostic('error', "Whitespace before
'#' on preprocessor line
", $fullline);
283 $preproc_continuation = /\\$/;
284 } elsif ($preproc_continuation) {
285 $preproc_continuation = /\\$/;
289 if ($check_space_tab && /^\+( (?:| | | ))[^ \t#].*(?:[^)];|[^);,])\n/) {
290 # Exclude lines ending ');', ')', or ',' to avoid reporting for wrapped function arguments.
291 diagnostic('error', "line indented by
".length($1)." spaces
", $fullline);
293 if (m!^\+.*\b([A-Za-z_][A-Za-z_0-9]*)\s+\(! &&
294 $1 !~ /^(bool|case|catch|double|for|if|int|return|switch|throw|void|while)$/) {
296 diagnostic('error', "Whitespace between
'$1' and '('", $fullline);
298 # FIXME: We skip preprocessor lines for now to avoid triggering
299 # on things like «#define FOUR (2+2)» but it would be good to
300 # catch «#define FOO(x) foo (x)»
303 if (m!^\+\s*(case|catch|class|do|for|if|namespace|struct|switch|try|union)\b([^ ]| \s)!) {
304 diagnostic('error', "'$1' not followed by exactly one space
", $fullline);
306 if (m!^\+.*;[^\s\\]!) {
307 diagnostic('error', "Missing space after
';'", $fullline);
309 if (m!^\+.*[^(;]\s;!) {
310 # Stuff like this is OK: for ( ; ; ) {
311 # though for that exact case I'd suggest: while (true) {
312 diagnostic('error', "Whitespace before
';'", $fullline);
314 if (m!^\+.*?\b(return)\b([^ ;]| \s)!) {
315 diagnostic('error', "'$1' not followed by exactly one space
", $fullline);
317 if (m!^\+.*?\b(else)\b([^ \n]| \s)!) {
318 diagnostic('error', "'$1' not followed by exactly one space
", $fullline);
320 if (m!^\+.*?\b(while)\b([^ ]| \s)!) {
321 diagnostic('error', "'$1' not followed by exactly one space
", $fullline);
323 if (m!^\+.*?(?:}|}\s{2,}|}\t|^[^}]*)\b(catch)\b!) {
324 diagnostic('error', "'$1' not preceded by exactly
'} '", $fullline);
326 if (m!^\+.*?(?:}|}\s{2,}|}\t)\b(else|while)\b!) {
327 diagnostic('error', "'}' and '$1' not separated by exactly one space
", $fullline);
329 if (m!^\+.*\((?: [^;]|\t)!) {
330 # Allow: for ( ; i != 10; ++i)
331 diagnostic('error', "Whitespace after
'('", $fullline);
333 if (m!^\+.*\H.*\h\)!) {
334 diagnostic('error', "Whitespace before
')'", $fullline);
336 if (m!^\+.*;\s*(\w+)([-+]{2})\)!) {
337 diagnostic('error', "Prefer
'$2$1' to
'$1$2'", $fullline);
339 if (m,^\+\s*[^#].*[\w)](?!-[->]|\+\+)((?:\&\&|\|\||<<|>>|[-+/*%~=<>!&|^])=?|[?]),) {
343 if (substr($_, $pre[1] - 8, 8) eq 'operator') {
345 } elsif ($op eq '>' && substr($_, 0, $pre[1]) =~ /[A-Za-z0-9_]</) {
346 # y = static_cast<char>(x);
347 } elsif ($op eq '>') {
348 } elsif ($op eq '<' && substr($_, $pre[1] - 1, 1) =~ /^[A-Za-z0-9_]$/ && substr($_, $post[1]) =~ />/) {
349 # y = static_cast<char>(x);
350 } elsif ($op eq '<' &&
351 substr($_, 0, $pre[1]) =~ /\b(?:enable_if|list|map|multimap|multiset|priority_queue|set|template|unordered_map|unordered_set|vector)$/) {
352 # y = priority_queue<Foo*,
354 # template<typename A,
356 } elsif ($op eq '&&' && substr($_, 0, $pre[1]) =~ /\b(?:auto|bool|double|float|int(?:\d+_t)?|long|string|uint\d+_t|unsigned|[A-Z][A-Za-z0-9_]*)$/) {
358 # method(Class&& foo);
359 } elsif (($op eq '<<' || $op eq '>>') &&
360 substr($_, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
361 substr($_, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
363 } elsif (($op eq '-' || $op eq '+') &&
364 substr($_, 0, $pre[1]) =~ /[0-9]\.?e$/) {
366 } elsif ($op eq '>>' &&
367 /[A-Za-z0-9_]<.+</) {
368 # vector<vector<int>> v;
369 } elsif ($op =~ /^[*&|]$/) {
370 # FIXME: *: const char* x;
371 # FIXME: &: const char& x;
374 diagnostic('error', "Missing space before
'$op'", $fullline);
377 if (m@^\+\s*[^#\s].*?((?:\&\&|\|\||<<|>>|[-+/*%~=<>!&|^])=?|[?:,])(?<!(?:-[->]|\+\+|::))(?:[\w\(\.\{!"']| \s)@) {
381 if ($op eq '~' && substr($_, $post[1]) =~ /^[A-Za-z][A-Za-z0-9_]*\(/) {
382 # Destructor - e.g. ~Foo();
383 } elsif (($op eq '-' || $op eq '+' || $op eq '!' || $op eq '~') &&
384 substr($_, 0, $pre[1]) =~ m@(?:[-+/*%~=<>&|,;?:] |[\[(]|\b(?:return|case) |^\+\s*)$@) {
385 # Unary -, +, !, ~: e.g. foo = +1; bar = x * (-y); baz = a * -b;
386 } elsif ($op eq ',' && (
387 /\b(?:AssertRel(?:Paranoid)?|TEST_REL)\(/ ||
390 } elsif ($op eq '>>' &&
391 /[A-Za-z0-9_]<.+</) {
392 # vector<vector<int>>&
393 } elsif ($op =~ /^[*&<>|]$/) {
394 # FIXME: *: const char *x;
395 # FIXME: *: const char &x;
396 # FIXME: < >: y = static_cast<char>(x);
398 } elsif (substr($_, $pre[1] - 8, 8) eq 'operator
') {
400 } elsif (($op eq '<<' || $op eq '>>') &&
401 substr($_, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
402 substr($_, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
404 } elsif (($op eq '-' || $op eq '+') &&
405 substr($_, 0, $pre[1]) =~ /[0-9]\.?e$/) {
408 diagnostic('error
', "Should have exactly one space after '$op'", $fullline);
412 diagnostic('error
', "Extra ';' at end of line", $fullline);
414 if (m@^\+[^#]*?[^#\h] +(,|->)@) {
415 diagnostic('error
', "Space before '$1'", $fullline);
417 if (m,^\+[^#]*?[^#\h] ,) {
418 diagnostic('error
', "Multiple spaces", $fullline);
420 if (m!^\+(?:.*[;{])?\s*/[/*]{1,2}\w!) {
421 diagnostic('error
', "added/changed line has comment without whitespace before the text", $fullline);
424 diagnostic('error
', "No space between ')' and '{'", $fullline);
426 if ($fnm !~ m!/(?:md5|posixy_wrapper|perftest)\.cc$! &&
427 m,^\+.*[^\w\.>]([a-z][a-z0-9]*[A-Z]\w*),) {
429 if ($symbol eq 'gzFile
' || $symbol eq 'uInt
' || $symbol =~ /^(?:de|in)flate[A-Z]/) {
430 # Whitelist symbols from APIs we use.
431 } elsif ($symbol =~ /^[gs]et[A-Z]$/) {
432 # For now, allow setD(), etc.
433 } elsif ($symbol =~ /^h(?:File|Read|Write|Pipe|Client)$/ || $symbol eq 'fdwCtrlType
' || $symbol eq 'pShutdownSocket
') {
434 # Platform specific names, allow for now.
436 diagnostic('error
', "camelCase identifier '$1' - Xapian coding convention is to use lower case and underscores for variables and functions, and CamelCase for class names", $fullline);
439 if ($lineno == 1 && m!^\+!) {
440 if (m!^/\*\* \@file (\S+)!) {
442 if (length $fnm == length $at_file ||
443 (length $fnm > length $at_file && substr($fnm, -length $at_file - 1, 1) eq '/') &&
444 substr($fnm, -length $at_file) eq $at_file) {
447 diagnostic('error
', "\@file doesn't match filename
", $fullline);
450 diagnostic('error', "\
@file missing
", $fullline);
453 if (/^\+.*\b(?:class|struct)\b.*:\s*$/) {
454 diagnostic('error', "Inheritance list
split after
':', should be before
", $fullline);
456 # Try to distinguish ternary operator (?:) correctly split after ":" vs
457 # constructor initialiser list incorrectly split after ":".
458 my $last_in_ternary = $in_ternary;
459 $in_ternary = / \?(?: |$)/;
460 if (!$last_in_ternary && !$in_ternary && /^\+.*\)\s*:\s*$/) {
461 diagnostic('error', "Constructor initialiser list
split after
':', should be before
", $fullline);
463 if (m,^\+\s+([-+/%^]|[&|]{2})\s,) {
464 diagnostic('error', "Expression
split before operator
'$1', should be after
", $fullline);
467 if (m!^\+\s*#\s*(ifndef|define|endif\s*/[*/])\s+((?:[A-Z]+_INCLUDED)?_?\w+_[Hh]\b)!) {
468 my ($type, $guard) = ($1, $2);
470 if (!defined $header_guard_macro) {
471 if ($type eq 'ifndef') {
472 $header_guard_macro = [$type, $guard];
473 my $expected_guard = uc $fnm;
474 $expected_guard =~ s![-.]!_!g;
476 if (length($expected_guard) > length($guard) &&
477 substr($expected_guard, -length($guard) - 1, 1) eq '/' &&
478 substr($expected_guard, -length($guard)) eq $guard) {
481 for my $i (1 .. length($guard)) {
482 my $ch_e = substr($expected_guard, -$i, 1);
483 my $ch_g = substr($guard, -$i, 1);
484 next if ($ch_e eq $ch_g);
485 last if ($ch_e ne '/' || $ch_g ne '_');
490 diagnostic('error', "include guard macro should match filename
", $fullline);
492 my $prefix = 'XAPIAN_INCLUDED_';
493 if ($fnm =~ m!.*omega/(?:.*/)?!) {
494 $prefix = 'OMEGA_INCLUDED_';
496 #} elsif ($fnm =~ s!.*xapian-core/.*/!!) {
497 # $expected_guard = "XAPIAN_INCLUDED_
" . $expected_guard;
498 #} elsif ($fnm =~ s!.*xapian-letor/.*/!!) {
499 #$expected_guard = "XAPIAN_INCLUDED_
" . $expected_guard;
500 if (defined $cut && $cut == -1) {
501 diagnostic('error', "include guard macro should
use prefix
'$prefix'", $fullline);
502 } elsif (defined $cut && substr($guard, 0, length($guard) - $cut + 1) ne $prefix) {
503 diagnostic('error', "include guard macro should
use prefix
'$prefix'", $fullline);
504 } elsif ($guard !~ /^\Q$prefix\E/) {
505 diagnostic('error', "include guard macro should
use prefix
'$prefix'", $fullline);
509 if (!($type eq 'define' && $header_guard_macro->[0] ne 'ifndef')) {
510 my $expected_guard = $header_guard_macro->[1];
511 $header_guard_macro->[0] = $type;
512 if ($guard ne $expected_guard) {
513 diagnostic('error', "include guard macro should be
$expected_guard", $fullline);
519 if (m!^\+\s*#\s*define\s+[A-Z]\+_INCLUDED_!) {
520 diagnostic('error', "include guard macro
defined in non
-header
", $fullline);
523 } elsif (defined $lang && $lang eq 'py') {
525 diagnostic('error', "';' at end of line of python code
", $fullline);
528 if ($fnm !~ m!xapian-check-patch|ChangeLog|NEWS|stemming/.*/(?:voc|output)\.txt$!) {
529 if (/^\+.*?\b(xapain|the the|initialsing|ipv5|outputing)\b/i ||
530 # Cases which just need to be the prefix of a word
531 /^\+.*?\b((?:deafult|parm|peform|acessor|comptib|seach|seperat|seprat|separater|iteratat|calulat|delimitor|charactor)[a-z]*\b)/i ||
532 # Case-sensitive cases
533 /^\+.*?\b(and and)\b/) {
534 diagnostic('error', "Typo
'$1'", $fullline);
538 my $first_char = substr($fullline, 0, 1);
539 if ($first_char eq ' ') {
541 } elsif ($first_char eq '+') {
544 } elsif ($first_char eq '-') {
546 } elsif ($first_char eq '\\') {
547 # "\ No newline at end of file
" - if preceded by a "+" line, this means
548 # that the patch leaves the file missing a newline at the end.
549 if ($last_first_char eq '+') {
550 diagnostic('error', 'No newline at end of file');
553 $last_first_char = $first_char;
555 if (scalar keys %count) {
556 for (sort keys %count) {
557 print STDERR "$_ count
:\t$count{$_}\n";
561 print STDERR <<"__END__
";
562 Files patched:\t$files
563 Lines added:\t$add_lines
564 Lines removed:\t$del_lines
566 exit(exists $count{'error'} ? 1 : 0);