Eliminate broken PostListCursor for non-const HoneyTable
[xapian.git] / xapian-maintainer-tools / xapian-check-patch
blob32c2ab9fd1d4a9eb0f01176356c5b0d88a200b8f
1 #! /usr/bin/perl -w
2 # Copyright (c) 2007-2017 Olly Betts
4 # Permission is hereby granted, free of charge, to any person obtaining a copy
5 # of this software and associated documentation files (the "Software"), to
6 # deal in the Software without restriction, including without limitation the
7 # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 # sell copies of the Software, and to permit persons to whom the Software is
9 # furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice shall be included in
12 # all copies or substantial portions of the Software.
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 # IN THE SOFTWARE.
22 require 5.000;
23 use strict;
24 use POSIX;
26 if (defined $ARGV[0] && $ARGV[0] eq '--help') {
27 print <<END;
28 Syntax: $0 [PATCH]...
30 Nit-pick Xapian patches.
32 A patch can be supplied on stdin, or one or more patch files listed on the
33 command line.
35 Produces output suitable for use with vim's quick-fix mode, and similar
36 features in other editors.
38 Example usage:
40 git diff master.. | xapian-check-patch > tmp.qf
41 vim -q tmp.qf
42 END
43 exit 0;
46 my ($fnm, $lineno);
47 my %count;
49 sub diagnostic {
50 my ($type, $msg, $fullline) = @_;
51 print "$fnm:$lineno: $type: $msg";
52 if (defined $fullline) {
53 print ": $fullline";
54 } else {
55 print "\n";
57 ++$count{$type};
60 my $add_lines = 0;
61 my $del_lines = 0;
62 my $files = 0;
63 # SVN property changes don't have an "Index: [...]" line.
64 my $want_tabs = -1;
65 my $check_trailing = 0;
66 my $check_space_tab = 0;
67 my $in_comment = 0;
68 my $lang;
69 my $header_guard_macro;
70 my $last_first_char = '';
71 my $in_ternary;
72 while (<>) {
73 if (/^Index: (.+)/ || m!^diff --git a/.+ b/(.+)!) {
74 ++$files;
75 $fnm = $1;
76 $lineno = 1;
77 $lang = undef;
78 $in_comment = 0;
79 $header_guard_macro = undef;
80 $in_ternary = 0;
81 # Don't know!
82 $want_tabs = -1;
83 if ($fnm =~ /\.cc$/) {
84 if ($fnm !~ m!\b(?:cdb|portability/)! &&
85 $fnm !~ m!\bcommon/getopt\.cc$! &&
86 $fnm !~ m!\bomega/md5\.cc$! &&
87 $fnm !~ m!\bcommon/msvc_dirent\.cc$!) {
88 $lang = 'c++';
89 $want_tabs = 1 unless ($fnm =~ m!\blanguages/steminternal\.cc$!);
91 } elsif ($fnm =~ /\.c$/) {
92 if ($fnm !~ m!\blanguages/compiler/! &&
93 $fnm !~ m!/lemon\.c$! &&
94 $fnm !~ m!/xapdep\.c$!) {
95 $lang = 'c';
96 $want_tabs = 1;
98 } elsif ($fnm =~ /\.h$/) {
99 if ($fnm !~ m!\binclude/xapian/intrusive_ptr\.h! &&
100 $fnm !~ m!\blanguages/compiler/! &&
101 $fnm !~ m!\bcommon/msvc_dirent\.h$! &&
102 $fnm !~ m!\bmatcher/heap\.h$!) {
103 $lang = 'h';
104 $want_tabs = 1 unless ($fnm =~ m!/omega/cdb!);
106 } elsif ($fnm =~ /\.py(?:\.in)?$/) {
107 $lang = 'py';
108 $want_tabs = 0;
109 } elsif ($fnm =~ m!(?:^|/)ChangeLog\b!) {
110 $lang = 'changelog';
111 $want_tabs = 1;
113 $check_trailing =
114 $fnm !~ /\.sbl$/ &&
115 $fnm !~ m!\bcommon/msvc_dirent\.! &&
116 $fnm !~ m!/lemon\.c$! &&
117 $fnm !~ m!/queryparser\.lt$! &&
118 $fnm !~ m!\bcdb! &&
119 $fnm !~ m!/testdata/etext\.txt$!;
120 $check_space_tab =
121 $fnm !~ /\.sbl$/;
122 # print STDERR "$fnm: lang=" . ($lang // "UNKNOWN") . "\;
123 next;
125 my $pre3 = substr($_, 0, 3);
126 if ($pre3 eq '@@ ') {
127 /^\@\@ -\d+,\d+ \+(\d+),\d+\b/ and $lineno = $1;
128 $in_comment = 0;
129 next;
131 if ($pre3 eq '---' || $pre3 eq '+++') {
132 next;
134 my $fullline = $_;
135 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c')) {
136 # Uncomment commented out parameter names: foo(int /*bar*/) -> foo(int bar)
137 s!/\*([A-Za-z_][A-Za-z_0-9]*)\*/([,)])!$1$2!g;
139 # Check for comments without a space before the comment text.
140 if (m!^\+.*\s/[*/]{1,2}[A-Za-z0-9]!) {
141 diagnostic('error', "Missing space between comment characters and comment text", $fullline);
144 # Trim comments:
145 if (s!/(?:\*.*?\*/|/.*)!!g) {
146 s/\s+$//;
148 if (s!/\*.*!!g) {
149 s/\s+$//;
150 $in_comment = 1;
152 # Trim content of comments ending on this line:
153 if (s!^(.).*\*/!$1*/!) {
154 $in_comment = 0;
156 if ($in_comment) {
157 $_ = '';
158 } else {
159 # Drop comment content for "*" continuation lines (when /* isn't in hunk):
160 s/^(.)(\s*\*).*/$1$2/;
162 } elsif (defined $lang && $lang eq 'py') {
163 # Trim comments:
164 if (s!#.*!!g) {
165 s/\s+$//;
169 # Replace multiple spaces before line continuation marker:
170 s! +\\$! \\!;
172 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c') && !(/^[-+]\s*\#/)) {
173 if (substr($_, 0, 1) eq '+') {
174 my $expandedline = '';
175 for my $i (1..length($fullline) - 1) {
176 my $ch = substr($fullline, $i, 1);
177 if ($ch eq "\t") {
178 $expandedline .= ('.' x (8 - length($expandedline) % 8));
179 } else {
180 $expandedline .= $ch;
183 chomp($expandedline);
184 if (length($expandedline) > 80 && !/^\+[ \t]*LOGCALL/) {
185 diagnostic('error', "Line extends beyond column 80 (to column ".length($expandedline).")", $fullline);
188 if (m,^\+\s+LOGCALL(?:_[A-Z0-9]+)*\([^"]*"[^"]*(?<!operator)\(,) {
189 diagnostic('error', "Don't include parentheses in debug logging method/class name", $fullline);
191 # Replace string literals containing escaped quotes:
192 if (/['"]/) {
193 my $quote = substr($_, $-[0], 1);
194 my $start = $+[0];
195 my $i = $start;
196 my $esc = 0;
197 QUOTELOOP: while (1) {
198 if ($i >= length($_)) {
199 $_ = substr($_, 0, $start) . "X\n";
200 last;
202 my $c = substr($_, $i, 1);
203 if ($c eq $quote) {
204 $_ = substr($_, 0, $start) . "X" . substr($_, $i);
205 $i = $start + 2;
206 # See if there's another string after this one:
207 while ($i != length($_)) {
208 $c = substr($_, $i, 1);
209 ++$i;
210 if ($c eq '"' || $c eq "'") {
211 $quote = $c;
212 $start = $i;
213 $esc = 0;
214 next QUOTELOOP;
217 last;
219 if ($c eq '\\') {
220 ++$i;
221 $c = substr($_, $i, 1);
222 if ($c eq 'x') {
223 ++$i while (substr($_, $i, 1) =~ /^[A-Fa-f0-9]$/);
224 next;
225 } elsif ($c =~ /^[0-7]/) {
226 my $j = $i;
227 ++$i while ($i - $j <= 3 && substr($_, $i, 1) =~ /^[0-7]$/);
228 next;
229 } elsif ($c eq '"' || $c eq "'") {
230 ++$esc;
233 ++$i;
238 if ($check_trailing && /^\+.*[ \t]$/) {
239 diagnostic('error', "added/changed line has trailing whitespace", $fullline);
241 if ($check_space_tab && /^\+.* \t/) {
242 diagnostic('error', "added/changed line has space before tab", $fullline);
244 if ($want_tabs == 1 and /^\+\t* {8}/) {
245 diagnostic('error', "added/changed line uses spaces for indentation rather than tab", $fullline);
247 if (!$want_tabs and /^\+ *\t/) {
248 diagnostic('error', "added/changed line uses tab for indentation rather than spaces", $fullline);
250 if ((!defined $lang || $lang ne 'changelog') && $fullline =~ /^([-+]).*\bFIX(?:ME)\b/) {
251 # Break up the string in the regexp above and messages below to avoid
252 # this triggering on its own code!
253 if ($1 eq '-') {
254 # Not an error, but interesting information.
255 diagnostic('info', "FIX"."ME removed", $fullline);
256 } else {
257 # Not an error, but not good.
258 diagnostic('warning', "FIX"."ME added", $fullline);
261 if (defined $lang && $lang ne 'changelog' && /^\+.*\\([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
262 diagnostic('error', "Doxygen command '\\$1' introduced by '\\' not '\@'", $fullline);
264 if (defined $lang && $lang ne 'changelog' && /^\+.*@\s+([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
265 diagnostic('error', "Broken Doxygen command: whitespace between '\@' and '$1'", $fullline);
267 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c')) {
268 if ($check_space_tab && /^\+( (?:| | | ))[^ \t#].*(?:[^)];|[^);,])\n/) {
269 # Exclude lines ending ');', ')', or ',' to avoid reporting for wrapped function arguments.
270 diagnostic('error', "line indented by ".length($1)." spaces", $fullline);
272 if (m!^\+\s*(case|catch|class|do|for|if|namespace|struct|switch|try|union)\b([^ ]| \s)!) {
273 diagnostic('error', "'$1' not followed by exactly one space", $fullline);
275 if (m!^\+.*;[^\s\\]!) {
276 diagnostic('error', "Missing space after ';'", $fullline);
278 if (m!^\+.*[^(;]\s;!) {
279 # Stuff like this is OK: for ( ; ; ) {
280 # though for that exact case I'd suggest: while (true) {
281 diagnostic('error', "Whitespace before ';'", $fullline);
283 if (m!^\+.*?\b(return)\b([^ ;]| \s)!) {
284 diagnostic('error', "'$1' not followed by exactly one space", $fullline);
286 if (m!^\+.*?\b(else)\b([^ \n]| \s)!) {
287 diagnostic('error', "'$1' not followed by exactly one space", $fullline);
289 if (m!^\+.*?\b(while)\b([^ ]| \s)!) {
290 diagnostic('error', "'$1' not followed by exactly one space", $fullline);
292 if (m!^\+.*?(?:}|}\s{2,}|}\t|^[^}]*)\b(catch)\b!) {
293 diagnostic('error', "'$1' not preceded by exactly '} '", $fullline);
295 if (m!^\+.*?(?:}|}\s{2,}|}\t)\b(else|while)\b!) {
296 diagnostic('error', "'}' and '$1' not separated by exactly one space", $fullline);
298 if (m!^\+.*\((?: [^;]|\t)!) {
299 # Allow: for ( ; i != 10; ++i)
300 diagnostic('error', "Whitespace after '('", $fullline);
302 if (m!^\+.*\H.*\h\)!) {
303 diagnostic('error', "Whitespace before ')'", $fullline);
305 if (m!^\+.*;\s*(\w+)([-+]{2})\)!) {
306 diagnostic('error', "Prefer '$2$1' to '$1$2'", $fullline);
308 if (m,^\+\s*[^#].*[\w)](?!-[->]|\+\+)((?:\&\&|\|\||<<|>>|[-+/*%~=<>!&|^])=?|[?]),) {
309 my @pre = @-;
310 my @post = @+;
311 my $op = $1;
312 if (substr($_, $pre[1] - 8, 8) eq 'operator') {
313 # operator*() etc
314 } elsif ($op eq '>' && substr($_, 0, $pre[1]) =~ /[A-Za-z0-9_]</) {
315 # y = static_cast<char>(x);
316 } elsif ($op eq '>') {
317 } elsif ($op eq '<' && substr($_, $pre[1] - 1, 1) =~ /^[A-Za-z0-9_]$/ && substr($_, $post[1]) =~ />/) {
318 # y = static_cast<char>(x);
319 } elsif ($op eq '<' &&
320 substr($_, 0, $pre[1]) =~ /\b(?:list|map|multimap|multiset|priority_queue|set|template|unordered_map|unordered_set|vector)$/) {
321 # y = priority_queue<Foo*,
322 # Bar>;
323 # template<typename A,
324 # typename B>
325 } elsif ($op eq '&&' && substr($_, $pre[1] - 4, 4) eq 'auto') {
326 # auto&& x
327 } elsif (($op eq '<<' || $op eq '>>') &&
328 substr($_, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
329 substr($_, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
330 # 0x00b1<<26
331 } elsif (($op eq '-' || $op eq '+') &&
332 substr($_, 0, $pre[1]) =~ /[0-9]\.?e$/) {
333 # 1.2e-3, 7.e+3
334 } elsif ($op eq '>>' &&
335 /[A-Za-z0-9_]<.+</) {
336 # vector<vector<int>> v;
337 } elsif ($op =~ /^[*&|]$/) {
338 # FIXME: *: const char* x;
339 # FIXME: &: const char& x;
340 # FIXME: |: FOO|BAR
341 } else {
342 diagnostic('error', "Missing space before '$op'", $fullline);
345 if (m@^\+\s*[^#\s].*?((?:\&\&|\|\||<<|>>|[-+/*%~=<>!&|^])=?|[?:,])(?<!(?:-[->]|\+\+|::))(?:[\w\(\.\{!"']| \s)@) {
346 my @pre = @-;
347 my @post = @+;
348 my $op = $1;
349 if ($op eq '~' && substr($_, $post[1]) =~ /^[A-Za-z][A-Za-z0-9_]*\(/) {
350 # Destructor - e.g. ~Foo();
351 } elsif (($op eq '-' || $op eq '+' || $op eq '!' || $op eq '~') &&
352 substr($_, 0, $pre[1]) =~ m@(?:[-+/*%~=<>&|,;?:] |[\[(]|\b(?:return|case) |^\+\s*)$@) {
353 # Unary -, +, !, ~: e.g. foo = +1; bar = x * (-y); baz = a * -b;
354 } elsif ($op eq ',' && (
355 /\b(?:AssertRel(?:Paranoid)?|TEST_REL)\(/ ||
356 /{[^()]*}/)) {
357 # AssertRel(a,<,b);
358 } elsif ($op eq '>>' &&
359 /[A-Za-z0-9_]<.+</) {
360 # vector<vector<int>>&
361 } elsif ($op =~ /^[*&<>|]$/) {
362 # FIXME: *: const char *x;
363 # FIXME: *: const char &x;
364 # FIXME: < >: y = static_cast<char>(x);
365 # FIXME: |: FOO|BAR
366 } elsif (substr($_, $pre[1] - 8, 8) eq 'operator') {
367 # operator==() etc
368 } elsif (($op eq '<<' || $op eq '>>') &&
369 substr($_, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
370 substr($_, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
371 # 0x00b1<<26
372 } elsif (($op eq '-' || $op eq '+') &&
373 substr($_, 0, $pre[1]) =~ /[0-9]\.?e$/) {
374 # 1.2e-3, 7.e+3
375 } else {
376 diagnostic('error', "Should have exactly one space after '$op'", $fullline);
379 if (/^\+.*;;\s*$/) {
380 diagnostic('error', "Extra ';' at end of line", $fullline);
382 if (m@^\+[^#]*?[^#\h] +(,|->)@) {
383 diagnostic('error', "Space before '$1'", $fullline);
385 if (m,^\+[^#]*?[^#\h] ,) {
386 diagnostic('error', "Multiple spaces", $fullline);
388 if (m!^\+(?:.*[;{])?\s*/[/*]{1,2}\w!) {
389 diagnostic('error', "added/changed line has comment without whitespace before the text", $fullline);
391 if (m!^\+.*?\)\{!) {
392 diagnostic('error', "No space between ')' and '{'", $fullline);
394 if ($fnm !~ m!/(?:md5|posixy_wrapper|perftest)\.cc$! &&
395 m,^\+.*[^\w\.>]([a-z][a-z0-9]*[A-Z]\w*),) {
396 my $symbol = $1;
397 if ($symbol eq 'gzFile' || $symbol eq 'uInt' || $symbol =~ /^(?:de|in)flate[A-Z]/) {
398 # Whitelist symbols from APIs we use.
399 } elsif ($symbol =~ /^[gs]et[A-Z]$/) {
400 # For now, allow setD(), etc.
401 } elsif ($symbol =~ /^h(?:File|Read|Write|Pipe|Client)$/ || $symbol eq 'fdwCtrlType' || $symbol eq 'pShutdownSocket') {
402 # Platform specific names, allow for now.
403 } else {
404 diagnostic('error', "camelCase identifier '$1' - Xapian coding convention is to use lower case and underscores for variables and functions, and CamelCase for class names", $fullline);
407 if ($lineno == 1 && m!^\+!) {
408 if (m!^/\*\* \@file (\S+)!) {
409 my $at_file = $1;
410 if (length $fnm == length $at_file ||
411 (length $fnm > length $at_file && substr($fnm, -length $at_file - 1, 1) eq '/') &&
412 substr($fnm, -length $at_file) eq $at_file) {
413 # @file matches
414 } else {
415 diagnostic('error', "\@file doesn't match filename", $fullline);
417 } else {
418 diagnostic('error', "\@file missing", $fullline);
421 if (/^\+.*\b(?:class|struct)\b.*:\s*$/) {
422 diagnostic('error', "Inheritance list split after ':', should be before", $fullline);
424 # Try to distinguish ternary operator (?:) correctly split after ":" vs
425 # constructor initialiser list incorrectly split after ":".
426 my $last_in_ternary = $in_ternary;
427 $in_ternary = / \?(?: |$)/;
428 if (!$last_in_ternary && !$in_ternary && /^\+.*\)\s*:\s*$/) {
429 diagnostic('error', "Constructor initialiser list split after ':', should be before", $fullline);
431 if (m,^\+\s+([-+/%^]|[&|]{2})\s,) {
432 diagnostic('error', "Expression split before operator '$1', should be after", $fullline);
434 if ($lang eq 'h') {
435 if (m!^\+\s*#\s*(ifndef|define|endif\s*/[*/])\s+((?:[A-Z]+_INCLUDED)?_?\w+_[Hh]\b)!) {
436 my ($type, $guard) = ($1, $2);
437 my $expected_guard;
438 if (!defined $header_guard_macro) {
439 if ($type eq 'ifndef') {
440 $header_guard_macro = [$type, $guard];
441 my $expected_guard = uc $fnm;
442 $expected_guard =~ s![-.]!_!g;
443 my $cut;
444 if (length($expected_guard) > length($guard) &&
445 substr($expected_guard, -length($guard) - 1, 1) eq '/' &&
446 substr($expected_guard, -length($guard)) eq $guard) {
447 $cut = -1;
448 } else {
449 for my $i (1 .. length($guard)) {
450 my $ch_e = substr($expected_guard, -$i, 1);
451 my $ch_g = substr($guard, -$i, 1);
452 next if ($ch_e eq $ch_g);
453 last if ($ch_e ne '/' || $ch_g ne '_');
454 $cut = $i;
457 if (!defined $cut) {
458 diagnostic('error', "include guard macro should match filename", $fullline);
460 my $prefix = 'XAPIAN_INCLUDED_';
461 if ($fnm =~ m!.*omega/(?:.*/)?!) {
462 $prefix = 'OMEGA_INCLUDED_';
464 #} elsif ($fnm =~ s!.*xapian-core/.*/!!) {
465 # $expected_guard = "XAPIAN_INCLUDED_" . $expected_guard;
466 #} elsif ($fnm =~ s!.*xapian-letor/.*/!!) {
467 #$expected_guard = "XAPIAN_INCLUDED_" . $expected_guard;
468 if (defined $cut && $cut == -1) {
469 diagnostic('error', "include guard macro should use prefix '$prefix'", $fullline);
470 } elsif (defined $cut && substr($guard, 0, length($guard) - $cut + 1) ne $prefix) {
471 diagnostic('error', "include guard macro should use prefix '$prefix'", $fullline);
472 } elsif ($guard !~ /^\Q$prefix\E/) {
473 diagnostic('error', "include guard macro should use prefix '$prefix'", $fullline);
476 } else {
477 if (!($type eq 'define' && $header_guard_macro->[0] ne 'ifndef')) {
478 my $expected_guard = $header_guard_macro->[1];
479 $header_guard_macro->[0] = $type;
480 if ($guard ne $expected_guard) {
481 diagnostic('error', "include guard macro should be $expected_guard", $fullline);
486 } else {
487 if (m!^\+\s*#\s*define\s+[A-Z]\+_INCLUDED_!) {
488 diagnostic('error', "include guard macro defined in non-header", $fullline);
491 } elsif (defined $lang && $lang eq 'py') {
492 if (/^\+.*;\s*$/) {
493 diagnostic('error', "';' at end of line of python code", $fullline);
496 if ($fnm !~ m!xapian-check-patch|ChangeLog|NEWS|stemming/.*/(?:voc|output)\.txt$!) {
497 if (/^\+.*?\b(xapain|the the|initialsing|ipv5|outputing)\b/i ||
498 # Cases which just need to be the prefix of a word
499 /^\+.*?\b((?:deafult|parm|peform|acessor|comptib|seach|seperat|seprat|separater|iteratat|calulat|delimitor|charactor)[a-z]*\b)/i ||
500 # Case-sensitive cases
501 /^\+.*?\b(and and)\b/) {
502 diagnostic('error', "Typo '$1'", $fullline);
506 my $first_char = substr($fullline, 0, 1);
507 if ($first_char eq ' ') {
508 ++$lineno;
509 } elsif ($first_char eq '+') {
510 ++$lineno;
511 ++$add_lines;
512 } elsif ($first_char eq '-') {
513 ++$del_lines;
514 } elsif ($first_char eq '\\') {
515 # "\ No newline at end of file" - if preceded by a "+" line, this means
516 # that the patch leaves the file missing a newline at the end.
517 if ($last_first_char eq '+') {
518 diagnostic('error', 'No newline at end of file');
521 $last_first_char = $first_char;
523 if (scalar keys %count) {
524 for (sort keys %count) {
525 print STDERR "$_ count:\t$count{$_}\n";
527 print STDERR "\n";
529 print STDERR <<"__END__";
530 Files patched:\t$files
531 Lines added:\t$add_lines
532 Lines removed:\t$del_lines
533 __END__
534 exit(exists $count{'error'} ? 1 : 0);