2 # Copyright (c) 2007-2017 Olly Betts
4 # Permission is hereby granted, free of charge, to any person obtaining a copy
5 # of this software and associated documentation files (the "Software"), to
6 # deal in the Software without restriction, including without limitation the
7 # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 # sell copies of the Software, and to permit persons to whom the Software is
9 # furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice shall be included in
12 # all copies or substantial portions of the Software.
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 if (defined $ARGV[0] && $ARGV[0] eq '--help') {
30 Nit-pick Xapian patches.
32 A patch can be supplied on stdin, or one or more patch files listed on the
35 Produces output suitable for use with vim's quick-fix mode, and similar
36 features in other editors.
40 git diff master.. | xapian-check-patch > tmp.qf
50 my ($type, $msg, $fullline) = @_;
51 print "$fnm:$lineno: $type: $msg";
52 if (defined $fullline) {
63 # SVN property changes don't have an "Index: [...]" line.
65 my $check_trailing = 0;
66 my $check_space_tab = 0;
69 my $header_guard_macro;
70 my $last_first_char = '';
73 if (/^Index: (.+)/ || m!^diff --git a/.+ b/(.+)!) {
79 $header_guard_macro = undef;
83 if ($fnm =~ /\.cc$/) {
84 if ($fnm !~ m!\b(?:cdb|portability/)! &&
85 $fnm !~ m!\bcommon/getopt\.cc$! &&
86 $fnm !~ m!\bomega/md5\.cc$! &&
87 $fnm !~ m!\bcommon/msvc_dirent\.cc$!) {
89 $want_tabs = 1 unless ($fnm =~ m!\blanguages/steminternal\.cc$!);
91 } elsif ($fnm =~ /\.c$/) {
92 if ($fnm !~ m!\blanguages/compiler/! &&
93 $fnm !~ m!/lemon\.c$! &&
94 $fnm !~ m!/xapdep\.c$!) {
98 } elsif ($fnm =~ /\.h$/) {
99 if ($fnm !~ m!\binclude/xapian/intrusive_ptr\.h! &&
100 $fnm !~ m!\blanguages/compiler/! &&
101 $fnm !~ m!\bcommon/msvc_dirent\.h$! &&
102 $fnm !~ m!\bmatcher/heap\.h$!) {
104 $want_tabs = 1 unless ($fnm =~ m!/omega/cdb!);
106 } elsif ($fnm =~ /\.py(?:\.in)?$/) {
109 } elsif ($fnm =~ m!(?:^|/)ChangeLog\b!) {
115 $fnm !~ m!\bcommon/msvc_dirent\.! &&
116 $fnm !~ m!/lemon\.c$! &&
117 $fnm !~ m!/queryparser\.lt$! &&
119 $fnm !~ m!/testdata/etext\.txt$!;
122 # print STDERR "$fnm: lang=" . ($lang // "UNKNOWN") . "\;
125 my $pre3 = substr($_, 0, 3);
126 if ($pre3 eq '@@ ') {
127 /^\@\@ -\d+,\d+ \+(\d+),\d+\b/ and $lineno = $1;
131 if ($pre3 eq '---' || $pre3 eq '+++') {
135 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c')) {
136 # Uncomment commented out parameter names: foo(int /*bar*/) -> foo(int bar)
137 s!/\*([A-Za-z_][A-Za-z_0-9]*)\*/([,)])!$1$2!g;
139 # Check for comments without a space before the comment text.
140 if (m!^\+.*\s/[*/]{1,2}[A-Za-z0-9]!) {
141 diagnostic
('error', "Missing space between comment characters and comment text", $fullline);
145 if (s!/(?:\*.*?\*/|/.*)!!g) {
152 # Trim content of comments ending on this line:
153 if (s!^(.).*\*/!$1*/!) {
159 # Drop comment content for "*" continuation lines (when /* isn't in hunk):
160 s/^(.)(\s*\*).*/$1$2/;
162 } elsif (defined $lang && $lang eq 'py') {
169 # Replace multiple spaces before line continuation marker:
172 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c') && !(/^[-+]\s*\#/)) {
173 if (substr($_, 0, 1) eq '+') {
174 my $expandedline = '';
175 for my $i (1..length($fullline) - 1) {
176 my $ch = substr($fullline, $i, 1);
178 $expandedline .= ('.' x
(8 - length($expandedline) % 8));
180 $expandedline .= $ch;
183 chomp($expandedline);
184 if (length($expandedline) > 80) {
185 diagnostic
('error', "Line extends beyond column 80 (to column ".length($expandedline).")", $fullline);
188 if (m
,^\
+\s
+LOGCALL
(?
:_
[A
-Z0
-9]+)*\
([^"]*"[^"]*(?<!operator)\(,) {
189 diagnostic('error', "Don
't include parentheses in debug logging method/class name", $fullline);
191 # Replace string literals containing escaped quotes:
193 my $quote = substr($_, $-[0], 1);
197 QUOTELOOP: while (1) {
198 if ($i >= length($_)) {
199 $_ = substr($_, 0, $start) . "X
\n";
202 my $c = substr($_, $i, 1);
204 $_ = substr($_, 0, $start) . "X
" . substr($_, $i);
206 # See if there's another string after this one:
207 while ($i != length($_)) {
208 $c = substr($_, $i, 1);
210 if ($c eq '"' || $c eq "'") {
221 $c = substr($_, $i, 1);
223 ++$i while (substr($_, $i, 1) =~ /^[A-Fa-f0-9]$/);
225 } elsif ($c =~ /^[0-7]/) {
227 ++$i while ($i - $j <= 3 && substr($_, $i, 1) =~ /^[0-7]$/);
229 } elsif ($c eq '"' || $c eq "'") {
238 if ($check_trailing && /^\+.*[ \t]$/) {
239 diagnostic('error', "added
/changed line has trailing whitespace
", $fullline);
241 if ($check_space_tab && /^\+.* \t/) {
242 diagnostic('error', "added
/changed line has space before tab
", $fullline);
244 if ($want_tabs == 1 and /^\+\t* {8}/) {
245 diagnostic('error', "added
/changed line uses spaces
for indentation rather than tab
", $fullline);
247 if (!$want_tabs and /^\+ *\t/) {
248 diagnostic('error', "added
/changed line uses tab
for indentation rather than spaces
", $fullline);
250 if ((!defined $lang || $lang ne 'changelog') && $fullline =~ /^([-+]).*\bFIX(?:ME)\b/) {
251 # Break up the string in the regexp above and messages below to avoid
252 # this triggering on its own code!
254 # Not an error, but interesting information.
255 diagnostic('info', "FIX
"."ME removed
", $fullline);
257 # Not an error, but not good.
258 diagnostic('warning', "FIX
"."ME added
", $fullline);
261 if (defined $lang && $lang ne 'changelog' && /^\+.*\\([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
262 diagnostic('error', "Doxygen command
'\\$1' introduced by
'\\' not '\@'", $fullline);
264 if (defined $lang && $lang ne 'changelog' && /^\+.*@\s+([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
265 diagnostic('error', "Broken Doxygen command
: whitespace between
'\@' and '$1'", $fullline);
267 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c')) {
268 if ($check_space_tab && /^\+( (?:| | | ))[^ \t#].*(?:[^)];|[^);,])\n/) {
269 # Exclude lines ending ');', ')', or ',' to avoid reporting for wrapped function arguments.
270 diagnostic('error', "line indented by
".length($1)." spaces
", $fullline);
272 if (m!^\+\s*(case|catch|class|do|for|if|namespace|struct|switch|try|union)\b([^ ]| \s)!) {
273 diagnostic('error', "'$1' not followed by exactly one space
", $fullline);
275 if (m!^\+.*;[^\s\\]!) {
276 diagnostic('error', "Missing space after
';'", $fullline);
278 if (m!^\+.*[^(;]\s;!) {
279 # Stuff like this is OK: for ( ; ; ) {
280 # though for that exact case I'd suggest: while (true) {
281 diagnostic('error', "Whitespace before
';'", $fullline);
283 if (m!^\+.*?\b(return)\b([^ ;]| \s)!) {
284 diagnostic('error', "'$1' not followed by exactly one space
", $fullline);
286 if (m!^\+.*?\b(else)\b([^ \n]| \s)!) {
287 diagnostic('error', "'$1' not followed by exactly one space
", $fullline);
289 if (m!^\+.*?\b(while)\b([^ ]| \s)!) {
290 diagnostic('error', "'$1' not followed by exactly one space
", $fullline);
292 if (m!^\+.*?(?:}|}\s{2,}|}\t|^[^}]*)\b(catch)\b!) {
293 diagnostic('error', "'$1' not preceded by exactly
'} '", $fullline);
295 if (m!^\+.*?(?:}|}\s{2,}|}\t)\b(else|while)\b!) {
296 diagnostic('error', "'}' and '$1' not separated by exactly one space
", $fullline);
298 if (m!^\+.*\((?: [^;]|\t)!) {
299 # Allow: for ( ; i != 10; ++i)
300 diagnostic('error', "Whitespace after
'('", $fullline);
302 if (m!^\+.*\H.*\h\)!) {
303 diagnostic('error', "Whitespace before
')'", $fullline);
305 if (m!^\+.*;\s*(\w+)([-+]{2})\)!) {
306 diagnostic('error', "Prefer
'$2$1' to
'$1$2'", $fullline);
308 if (m,^\+\s*[^#].*[\w)](?!-[->]|\+\+)((?:\&\&|\|\||<<|>>|[-+/*%~=<>!&|^])=?|[?]),) {
312 if (substr($_, $pre[1] - 8, 8) eq 'operator') {
314 } elsif ($op eq '>' && substr($_, 0, $pre[1]) =~ /[A-Za-z0-9_]</) {
315 # y = static_cast<char>(x);
316 } elsif ($op eq '>') {
317 } elsif ($op eq '<' && substr($_, $pre[1] - 1, 1) =~ /^[A-Za-z0-9_]$/ && substr($_, $post[1]) =~ />/) {
318 # y = static_cast<char>(x);
319 } elsif ($op eq '<' &&
320 substr($_, 0, $pre[1]) =~ /\b(?:list|map|multimap|multiset|priority_queue|set|template|unordered_map|unordered_set|vector)$/) {
321 # y = priority_queue<Foo*,
323 # template<typename A,
325 } elsif ($op eq '&&' && substr($_, $pre[1] - 4, 4) eq 'auto') {
327 } elsif (($op eq '<<' || $op eq '>>') &&
328 substr($_, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
329 substr($_, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
331 } elsif (($op eq '-' || $op eq '+') &&
332 substr($_, 0, $pre[1]) =~ /[0-9]\.?e$/) {
334 } elsif ($op eq '>>' &&
335 /[A-Za-z0-9_]<.+</) {
336 # vector<vector<int>> v;
337 } elsif ($op =~ /^[*&|]$/) {
338 # FIXME: *: const char* x;
339 # FIXME: &: const char& x;
342 diagnostic('error', "Missing space before
'$op'", $fullline);
345 if (m@^\+\s*[^#\s].*?((?:\&\&|\|\||<<|>>|[-+/*%~=<>!&|^])=?|[?:,])(?<!(?:-[->]|\+\+|::))(?:[\w\(\.\{!"']| \s)@) {
349 if ($op eq '~' && substr($_, $post[1]) =~ /^[A-Za-z][A-Za-z0-9_]*\(/) {
350 # Destructor - e.g. ~Foo();
351 } elsif (($op eq '-' || $op eq '+' || $op eq '!' || $op eq '~') &&
352 substr($_, 0, $pre[1]) =~ m@(?:[-+/*%~=<>&|,;?:] |[\[(]|\b(?:return|case) |^\+\s*)$@) {
353 # Unary -, +, !, ~: e.g. foo = +1; bar = x * (-y); baz = a * -b;
354 } elsif ($op eq ',' && (
355 /\b(?:AssertRel(?:Paranoid)?|TEST_REL)\(/ ||
358 } elsif ($op eq '>>' &&
359 /[A-Za-z0-9_]<.+</) {
360 # vector<vector<int>>&
361 } elsif ($op =~ /^[*&<>|]$/) {
362 # FIXME: *: const char *x;
363 # FIXME: *: const char &x;
364 # FIXME: < >: y = static_cast<char>(x);
366 } elsif (substr($_, $pre[1] - 8, 8) eq 'operator
') {
368 } elsif (($op eq '<<' || $op eq '>>') &&
369 substr($_, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
370 substr($_, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
372 } elsif (($op eq '-' || $op eq '+') &&
373 substr($_, 0, $pre[1]) =~ /[0-9]\.?e$/) {
376 diagnostic('error
', "Should have exactly one space after '$op'", $fullline);
380 diagnostic('error
', "Extra ';' at end of line", $fullline);
382 if (m@^\+[^#]*?[^#\h] +(,|->)@) {
383 diagnostic('error
', "Space before '$1'", $fullline);
385 if (m,^\+[^#]*?[^#\h] ,) {
386 diagnostic('error
', "Multiple spaces", $fullline);
388 if (m!^\+(?:.*[;{])?\s*/[/*]{1,2}\w!) {
389 diagnostic('error
', "added/changed line has comment without whitespace before the text", $fullline);
392 diagnostic('error
', "No space between ')' and '{'", $fullline);
394 if ($fnm !~ m!/(?:md5|posixy_wrapper|perftest)\.cc$! &&
395 m,^\+.*[^\w\.>]([a-z][a-z0-9]*[A-Z]\w*),) {
397 if ($symbol eq 'gzFile
' || $symbol eq 'uInt
' || $symbol =~ /^(?:de|in)flate[A-Z]/) {
398 # Whitelist symbols from APIs we use.
399 } elsif ($symbol =~ /^[gs]et[A-Z]$/) {
400 # For now, allow setD(), etc.
401 } elsif ($symbol =~ /^h(?:File|Read|Write|Pipe|Client)$/ || $symbol eq 'fdwCtrlType
' || $symbol eq 'pShutdownSocket
') {
402 # Platform specific names, allow for now.
404 diagnostic('error
', "camelCase identifier '$1' - Xapian coding convention is to use lower case and underscores for variables and functions, and CamelCase for class names", $fullline);
407 if ($lineno == 1 && m!^\+!) {
408 if (m!^/\*\* \@file (\S+)!) {
410 if (length $fnm == length $at_file ||
411 (length $fnm > length $at_file && substr($fnm, -length $at_file - 1, 1) eq '/') &&
412 substr($fnm, -length $at_file) eq $at_file) {
415 diagnostic('error
', "\@file doesn't match filename
", $fullline);
418 diagnostic('error', "\
@file missing
", $fullline);
421 if (/^\+.*\b(?:class|struct)\b.*:\s*$/) {
422 diagnostic('error', "Inheritance list
split after
':', should be before
", $fullline);
424 # Try to distinguish ternary operator (?:) correctly split after ":" vs
425 # constructor initialiser list incorrectly split after ":".
426 my $last_in_ternary = $in_ternary;
427 $in_ternary = / \?(?: |$)/;
428 if (!$last_in_ternary && !$in_ternary && /^\+.*\)\s*:\s*$/) {
429 diagnostic('error', "Constructor initialiser list
split after
':', should be before
", $fullline);
431 if (m,^\+\s+([-+/%^]|[&|]{2})\s,) {
432 diagnostic('error', "Expression
split before operator
'$1', should be after
", $fullline);
435 if (m!^\+\s*#\s*(ifndef|define|endif\s*/[*/])\s+((?:[A-Z]+_INCLUDED)?_?\w+_[Hh]\b)!) {
436 my ($type, $guard) = ($1, $2);
438 if (!defined $header_guard_macro) {
439 if ($type eq 'ifndef') {
440 $header_guard_macro = [$type, $guard];
441 my $expected_guard = uc $fnm;
442 $expected_guard =~ s![-.]!_!g;
444 if (length($expected_guard) > length($guard) &&
445 substr($expected_guard, -length($guard) - 1, 1) eq '/' &&
446 substr($expected_guard, -length($guard)) eq $guard) {
449 for my $i (1 .. length($guard)) {
450 my $ch_e = substr($expected_guard, -$i, 1);
451 my $ch_g = substr($guard, -$i, 1);
452 next if ($ch_e eq $ch_g);
453 last if ($ch_e ne '/' || $ch_g ne '_');
458 diagnostic('error', "include guard macro should match filename
", $fullline);
460 my $prefix = 'XAPIAN_INCLUDED_';
461 if ($fnm =~ m!.*omega/(?:.*/)?!) {
462 $prefix = 'OMEGA_INCLUDED_';
464 #} elsif ($fnm =~ s!.*xapian-core/.*/!!) {
465 # $expected_guard = "XAPIAN_INCLUDED_
" . $expected_guard;
466 #} elsif ($fnm =~ s!.*xapian-letor/.*/!!) {
467 #$expected_guard = "XAPIAN_INCLUDED_
" . $expected_guard;
468 if (defined $cut && $cut == -1) {
469 diagnostic('error', "include guard macro should
use prefix
'$prefix'", $fullline);
470 } elsif (defined $cut && substr($guard, 0, length($guard) - $cut + 1) ne $prefix) {
471 diagnostic('error', "include guard macro should
use prefix
'$prefix'", $fullline);
472 } elsif ($guard !~ /^\Q$prefix\E/) {
473 diagnostic('error', "include guard macro should
use prefix
'$prefix'", $fullline);
477 if (!($type eq 'define' && $header_guard_macro->[0] ne 'ifndef')) {
478 my $expected_guard = $header_guard_macro->[1];
479 $header_guard_macro->[0] = $type;
480 if ($guard ne $expected_guard) {
481 diagnostic('error', "include guard macro should be
$expected_guard", $fullline);
487 if (m!^\+\s*#\s*define\s+[A-Z]\+_INCLUDED_!) {
488 diagnostic('error', "include guard macro
defined in non
-header
", $fullline);
491 } elsif (defined $lang && $lang eq 'py') {
493 diagnostic('error', "';' at end of line of python code
", $fullline);
496 if ($fnm !~ m!xapian-check-patch|ChangeLog|NEWS|stemming/.*/(?:voc|output)\.txt$!) {
497 if (/^\+.*?\b(xapain|the the|initialsing|ipv5|outputing)\b/i ||
498 # Cases which just need to be the prefix of a word
499 /^\+.*?\b((?:deafult|parm|peform|acessor|comptib|seach|seperat|seprat|separater|iteratat|calulat|delimitor|charactor)[a-z]*\b)/i ||
500 # Case-sensitive cases
501 /^\+.*?\b(and and)\b/) {
502 diagnostic('error', "Typo
'$1'", $fullline);
506 my $first_char = substr($fullline, 0, 1);
507 if ($first_char eq ' ') {
509 } elsif ($first_char eq '+') {
512 } elsif ($first_char eq '-') {
514 } elsif ($first_char eq '\\') {
515 # "\ No newline at end of file
" - if preceded by a "+" line, this means
516 # that the patch leaves the file missing a newline at the end.
517 if ($last_first_char eq '+') {
518 diagnostic('error', 'No newline at end of file');
521 $last_first_char = $first_char;
523 if (scalar keys %count) {
524 for (sort keys %count) {
525 print STDERR "$_ count
:\t$count{$_}\n";
529 print STDERR <<"__END__
";
530 Files patched:\t$files
531 Lines added:\t$add_lines
532 Lines removed:\t$del_lines
534 exit(exists $count{'error'} ? 1 : 0);