*(foo.get()) -> *foo
[xapian.git] / xapian-maintainer-tools / xapian-check-patch
blob1647a726e5f414e49ae356150406e66795b48055
1 #! /usr/bin/perl -w
2 # Copyright (c) 2007-2017 Olly Betts
4 # Permission is hereby granted, free of charge, to any person obtaining a copy
5 # of this software and associated documentation files (the "Software"), to
6 # deal in the Software without restriction, including without limitation the
7 # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 # sell copies of the Software, and to permit persons to whom the Software is
9 # furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice shall be included in
12 # all copies or substantial portions of the Software.
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 # IN THE SOFTWARE.
22 require 5.000;
23 use strict;
24 use POSIX;
26 if (defined $ARGV[0] && $ARGV[0] eq '--help') {
27 print <<END;
28 Syntax: $0 [PATCH]...
30 Nit-pick Xapian patches.
32 A patch can be supplied on stdin, or one or more patch files listed on the
33 command line.
35 Produces output suitable for use with vim's quick-fix mode, and similar
36 features in other editors.
38 Example usage:
40 git diff master.. | xapian-check-patch > tmp.qf
41 vim -q tmp.qf
42 END
43 exit 0;
46 my ($fnm, $lineno);
47 my %count;
49 sub diagnostic {
50 my ($type, $msg, $fullline) = @_;
51 print "$fnm:$lineno: $type: $msg";
52 if (defined $fullline) {
53 print ": $fullline";
54 } else {
55 print "\n";
57 ++$count{$type};
60 my $add_lines = 0;
61 my $del_lines = 0;
62 my $files = 0;
63 # SVN property changes don't have an "Index: [...]" line.
64 my $want_tabs = -1;
65 my $check_trailing = 0;
66 my $check_space_tab = 0;
67 my $in_comment = 0;
68 my $lang;
69 my $header_guard_macro;
70 my $last_first_char = '';
71 my $in_ternary;
72 while (<>) {
73 if (/^Index: (.+)/ || m!^diff --git a/.+ b/(.+)!) {
74 ++$files;
75 $fnm = $1;
76 $lineno = 1;
77 $lang = undef;
78 $in_comment = 0;
79 $header_guard_macro = undef;
80 $in_ternary = 0;
81 # Don't know!
82 $want_tabs = -1;
83 if ($fnm =~ /\.cc$/) {
84 if ($fnm !~ m!\b(?:cdb|portability/)! &&
85 $fnm !~ m!\bcommon/getopt\.cc$! &&
86 $fnm !~ m!\bomega/md5\.cc$! &&
87 $fnm !~ m!\bcommon/msvc_dirent\.cc$!) {
88 $lang = 'c++';
89 $want_tabs = 1 unless ($fnm =~ m!\blanguages/steminternal\.cc$!);
91 } elsif ($fnm =~ /\.c$/) {
92 if ($fnm !~ m!\blanguages/compiler/! &&
93 $fnm !~ m!/lemon\.c$! &&
94 $fnm !~ m!/xapdep\.c$!) {
95 $lang = 'c';
96 $want_tabs = 1;
98 } elsif ($fnm =~ /\.h$/) {
99 if ($fnm !~ m!\binclude/xapian/intrusive_ptr\.h! &&
100 $fnm !~ m!\blanguages/compiler/! &&
101 $fnm !~ m!\bcommon/msvc_dirent\.h$!) {
102 $lang = 'h';
103 $want_tabs = 1 unless ($fnm =~ m!/omega/cdb!);
105 } elsif ($fnm =~ /\.py(?:\.in)?$/) {
106 $lang = 'py';
107 $want_tabs = 0;
108 } elsif ($fnm =~ m!(?:^|/)ChangeLog\b!) {
109 $lang = 'changelog';
110 $want_tabs = 1;
112 $check_trailing =
113 $fnm !~ /\.sbl$/ &&
114 $fnm !~ m!\bcommon/msvc_dirent\.! &&
115 $fnm !~ m!/lemon\.c$! &&
116 $fnm !~ m!/queryparser\.lt$! &&
117 $fnm !~ m!\bcdb! &&
118 $fnm !~ m!/testdata/etext\.txt$!;
119 $check_space_tab =
120 $fnm !~ /\.sbl$/;
121 # print STDERR "$fnm: lang=" . ($lang // "UNKNOWN") . "\;
122 next;
124 my $pre3 = substr($_, 0, 3);
125 if ($pre3 eq '@@ ') {
126 /^\@\@ -\d+,\d+ \+(\d+),\d+\b/ and $lineno = $1;
127 $in_comment = 0;
128 next;
130 if ($pre3 eq '---' || $pre3 eq '+++') {
131 next;
133 my $fullline = $_;
134 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c')) {
135 # Uncomment commented out parameter names: foo(int /*bar*/) -> foo(int bar)
136 s!/\*([A-Za-z_][A-Za-z_0-9]*)\*/([,)])!$1$2!g;
138 # Check for comments without a space before the comment text.
139 if (m!^\+.*\s/[*/]{1,2}[A-Za-z0-9]!) {
140 diagnostic('error', "Missing space between comment characters and comment text", $fullline);
143 # Trim comments:
144 if (s!/(?:\*.*?\*/|/.*)!!g) {
145 s/\s+$//;
147 if (s!/\*.*!!g) {
148 s/\s+$//;
149 $in_comment = 1;
151 # Trim content of comments ending on this line:
152 if (s!^(.).*\*/!$1*/!) {
153 $in_comment = 0;
155 if ($in_comment) {
156 $_ = '';
157 } else {
158 # Drop comment content for "*" continuation lines (when /* isn't in hunk):
159 s/^(.)(\s*\*).*/$1$2/;
161 } elsif (defined $lang && $lang eq 'py') {
162 # Trim comments:
163 if (s!#.*!!g) {
164 s/\s+$//;
168 # Replace multiple spaces before line continuation marker:
169 s! +\\$! \\!;
171 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c') && !(/^[-+]\s*\#/)) {
172 if (m,^\+\s+LOGCALL(?:_[A-Z0-9]+)*\([^"]*"[^"]*(?<!operator)\(,) {
173 diagnostic('error', "Don't include parentheses in debug logging method/class name", $fullline);
175 # Replace string literals containing escaped quotes:
176 if (/['"]/) {
177 my $quote = substr($_, $-[0], 1);
178 my $start = $+[0];
179 my $i = $start;
180 my $esc = 0;
181 QUOTELOOP: while (1) {
182 if ($i >= length($_)) {
183 $_ = substr($_, 0, $start) . "X\n";
184 last;
186 my $c = substr($_, $i, 1);
187 if ($c eq $quote) {
188 $_ = substr($_, 0, $start) . "X" . substr($_, $i);
189 $i = $start + 2;
190 # See if there's another string after this one:
191 while ($i != length($_)) {
192 $c = substr($_, $i, 1);
193 ++$i;
194 if ($c eq '"' || $c eq "'") {
195 $quote = $c;
196 $start = $i;
197 $esc = 0;
198 next QUOTELOOP;
201 last;
203 if ($c eq '\\') {
204 ++$i;
205 $c = substr($_, $i, 1);
206 if ($c eq 'x') {
207 ++$i while (substr($_, $i, 1) =~ /^[A-Fa-f0-9]$/);
208 next;
209 } elsif ($c =~ /^[0-7]/) {
210 my $j = $i;
211 ++$i while ($i - $j <= 3 && substr($_, $i, 1) =~ /^[0-7]$/);
212 next;
213 } elsif ($c eq '"' || $c eq "'") {
214 ++$esc;
217 ++$i;
222 if ($check_trailing && /^\+.*[ \t]$/) {
223 diagnostic('error', "added/changed line has trailing whitespace", $fullline);
225 if ($check_space_tab && /^\+.* \t/) {
226 diagnostic('error', "added/changed line has space before tab", $fullline);
228 if ($want_tabs == 1 and /^\+\t* {8}/) {
229 diagnostic('error', "added/changed line uses spaces for indentation rather than tab", $fullline);
231 if (!$want_tabs and /^\+ *\t/) {
232 diagnostic('error', "added/changed line uses tab for indentation rather than spaces", $fullline);
234 if ((!defined $lang || $lang ne 'changelog') && $fullline =~ /^([-+]).*\bFIX(?:ME)\b/) {
235 # Break up the string in the regexp above and messages below to avoid
236 # this triggering on its own code!
237 if ($1 eq '-') {
238 # Not an error, but interesting information.
239 diagnostic('info', "FIX"."ME removed", $fullline);
240 } else {
241 # Not an error, but not good.
242 diagnostic('warning', "FIX"."ME added", $fullline);
245 if (defined $lang && $lang ne 'changelog' && /^\+.*\\([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
246 diagnostic('error', "Doxygen command '\\$1' introduced by '\\' not '\@'", $fullline);
248 if (defined $lang && $lang ne 'changelog' && /^\+.*@\s+([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
249 diagnostic('error', "Broken Doxygen command: whitespace between '\@' and '$1'", $fullline);
251 if (defined $lang && ($lang eq 'c++' || $lang eq 'h' || $lang eq 'c')) {
252 if ($check_space_tab && /^\+( (?:| | | ))[^ \t#].*(?:[^)];|[^);,])\n/) {
253 # Exclude lines ending ');', ')', or ',' to avoid reporting for wrapped function arguments.
254 diagnostic('error', "line indented by ".length($1)." spaces", $fullline);
256 if (m!^\+\s*(case|catch|class|do|for|if|namespace|struct|switch|try|union)\b([^ ]| \s)!) {
257 diagnostic('error', "'$1' not followed by exactly one space", $fullline);
259 if (m!^\+.*;[^\s\\]!) {
260 diagnostic('error', "Missing space after ';'", $fullline);
262 if (m!^\+.*[^(;]\s;!) {
263 # Stuff like this is OK: for ( ; ; ) {
264 # though for that exact case I'd suggest: while (true) {
265 diagnostic('error', "Whitespace before ';'", $fullline);
267 if (m!^\+.*?\b(return)\b([^ ;]| \s)!) {
268 diagnostic('error', "'$1' not followed by exactly one space", $fullline);
270 if (m!^\+.*?\b(else)\b([^ \n]| \s)!) {
271 diagnostic('error', "'$1' not followed by exactly one space", $fullline);
273 if (m!^\+.*?\b(while)\b([^ ]| \s)!) {
274 diagnostic('error', "'$1' not followed by exactly one space", $fullline);
276 if (m!^\+.*?(?:}|}\s{2,}|}\t|^[^}]*)\b(catch)\b!) {
277 diagnostic('error', "'$1' not preceded by exactly '} '", $fullline);
279 if (m!^\+.*?(?:}|}\s{2,}|}\t)\b(else|while)\b!) {
280 diagnostic('error', "'}' and '$1' not separated by exactly one space", $fullline);
282 if (m!^\+.*\((?: [^;]|\t)!) {
283 # Allow: for ( ; i != 10; ++i)
284 diagnostic('error', "Whitespace after '('", $fullline);
286 if (m!^\+.*\H.*\h\)!) {
287 diagnostic('error', "Whitespace before ')'", $fullline);
289 if (m!^\+.*;\s*(\w+)([-+]{2})\)!) {
290 diagnostic('error', "Prefer '$2$1' to '$1$2'", $fullline);
292 if (m,^\+\s*[^#].*[\w)](?!-[->]|\+\+)((?:\&\&|\|\||<<|>>|[-+/*%~=<>!&|^])=?|[?]),) {
293 my @pre = @-;
294 my @post = @+;
295 my $op = $1;
296 if (substr($_, $pre[1] - 8, 8) eq 'operator') {
297 # operator*() etc
298 } elsif ($op eq '>' && substr($_, 0, $pre[1]) =~ /[A-Za-z0-9_]</) {
299 # y = static_cast<char>(x);
300 } elsif ($op eq '>') {
301 } elsif ($op eq '<' && substr($_, $pre[1] - 1, 1) =~ /^[A-Za-z0-9_]$/ && substr($_, $post[1]) =~ />/) {
302 # y = static_cast<char>(x);
303 } elsif ($op eq '<' &&
304 substr($_, 0, $pre[1]) =~ /\b(?:list|map|multimap|multiset|priority_queue|set|template|unordered_map|unordered_set|vector)$/) {
305 # y = priority_queue<Foo*,
306 # Bar>;
307 # template<typename A,
308 # typename B>
309 } elsif ($op eq '&&' && substr($_, $pre[1] - 4, 4) eq 'auto') {
310 # auto&& x
311 } elsif (($op eq '<<' || $op eq '>>') &&
312 substr($_, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
313 substr($_, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
314 # 0x00b1<<26
315 } elsif (($op eq '-' || $op eq '+') &&
316 substr($_, 0, $pre[1]) =~ /[0-9]\.?e$/) {
317 # 1.2e-3, 7.e+3
318 } elsif ($op eq '>>' &&
319 /[A-Za-z0-9_]<.+</) {
320 # vector<vector<int>> v;
321 } elsif ($op =~ /^[*&|]$/) {
322 # FIXME: *: const char* x;
323 # FIXME: &: const char& x;
324 # FIXME: |: FOO|BAR
325 } else {
326 diagnostic('error', "Missing space before '$op'", $fullline);
329 if (m@^\+\s*[^#\s].*?((?:\&\&|\|\||<<|>>|[-+/*%~=<>!&|^])=?|[?:,])(?<!(?:-[->]|\+\+|::))(?:[\w\(\.\{!"']| \s)@) {
330 my @pre = @-;
331 my @post = @+;
332 my $op = $1;
333 if ($op eq '~' && substr($_, $post[1]) =~ /^[A-Za-z][A-Za-z0-9_]*\(/) {
334 # Destructor - e.g. ~Foo();
335 } elsif (($op eq '-' || $op eq '+' || $op eq '!' || $op eq '~') &&
336 substr($_, 0, $pre[1]) =~ m@(?:[-+/*%~=<>&|,;?:] |[\[(]|\b(?:return|case) |^\+\s*)$@) {
337 # Unary -, +, !, ~: e.g. foo = +1; bar = x * (-y); baz = a * -b;
338 } elsif ($op eq ',' && (
339 /\b(?:AssertRel(?:Paranoid)?|TEST_REL)\(/ ||
340 /{[^()]*}/)) {
341 # AssertRel(a,<,b);
342 } elsif ($op eq '>>' &&
343 /[A-Za-z0-9_]<.+</) {
344 # vector<vector<int>>&
345 } elsif ($op =~ /^[*&<>|]$/) {
346 # FIXME: *: const char *x;
347 # FIXME: *: const char &x;
348 # FIXME: < >: y = static_cast<char>(x);
349 # FIXME: |: FOO|BAR
350 } elsif (substr($_, $pre[1] - 8, 8) eq 'operator') {
351 # operator==() etc
352 } elsif (($op eq '<<' || $op eq '>>') &&
353 substr($_, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
354 substr($_, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
355 # 0x00b1<<26
356 } elsif (($op eq '-' || $op eq '+') &&
357 substr($_, 0, $pre[1]) =~ /[0-9]\.?e$/) {
358 # 1.2e-3, 7.e+3
359 } else {
360 diagnostic('error', "Should have exactly one space after '$op'", $fullline);
363 if (/^\+.*;;\s*$/) {
364 diagnostic('error', "Extra ';' at end of line", $fullline);
366 if (m@^\+[^#]*?[^#\h] +(,|->)@) {
367 diagnostic('error', "Space before '$1'", $fullline);
369 if (m,^\+[^#]*?[^#\h] ,) {
370 diagnostic('error', "Multiple spaces", $fullline);
372 if (m!^\+(?:.*[;{])?\s*/[/*]{1,2}\w!) {
373 diagnostic('error', "added/changed line has comment without whitespace before the text", $fullline);
375 if (m!^\+.*?\)\{!) {
376 diagnostic('error', "No space between ')' and '{'", $fullline);
378 if ($fnm !~ m!/(?:md5|posixy_wrapper|perftest)\.cc$! &&
379 m,^\+.*[^\w\.>]([a-z][a-z0-9]*[A-Z]\w*),) {
380 my $symbol = $1;
381 if ($symbol eq 'gzFile' || $symbol eq 'uInt' || $symbol =~ /^(?:de|in)flate[A-Z]/) {
382 # Whitelist symbols from APIs we use.
383 } elsif ($symbol =~ /^[gs]et[A-Z]$/) {
384 # For now, allow setD(), etc.
385 } elsif ($symbol =~ /^h(?:File|Read|Write|Pipe|Client)$/ || $symbol eq 'fdwCtrlType' || $symbol eq 'pShutdownSocket') {
386 # Platform specific names, allow for now.
387 } else {
388 diagnostic('error', "camelCase identifier '$1' - Xapian coding convention is to use lower case and underscores for variables and functions, and CamelCase for class names", $fullline);
391 if ($lineno == 1 && m!^\+!) {
392 if (m!^/\*\* \@file (\S+)!) {
393 my $at_file = $1;
394 if (length $fnm == length $at_file ||
395 (length $fnm > length $at_file && substr($fnm, -length $at_file - 1, 1) eq '/') &&
396 substr($fnm, -length $at_file) eq $at_file) {
397 # @file matches
398 } else {
399 diagnostic('error', "\@file doesn't match filename", $fullline);
401 } else {
402 diagnostic('error', "\@file missing", $fullline);
405 if (/^\+.*\b(?:class|struct)\b.*:\s*$/) {
406 diagnostic('error', "Inheritance list split after ':', should be before", $fullline);
408 # Try to distinguish ternary operator (?:) correctly split after ":" vs
409 # constructor initialiser list incorrectly split after ":".
410 my $last_in_ternary = $in_ternary;
411 $in_ternary = / \?(?: |$)/;
412 if (!$last_in_ternary && !$in_ternary && /^\+.*\)\s*:\s*$/) {
413 diagnostic('error', "Constructor initialiser list split after ':', should be before", $fullline);
415 if (m,^\+\s+([-+/%^]|[&|]{2})\s,) {
416 diagnostic('error', "Expression split before operator '$1', should be after", $fullline);
418 if ($lang eq 'h') {
419 if (m!^\+\s*#\s*(ifndef|define|endif\s*/[*/])\s+((?:[A-Z]+_INCLUDED)?_?\w+_[Hh]\b)!) {
420 my ($type, $guard) = ($1, $2);
421 my $expected_guard;
422 if (!defined $header_guard_macro) {
423 if ($type eq 'ifndef') {
424 $header_guard_macro = [$type, $guard];
425 my $expected_guard = uc $fnm;
426 $expected_guard =~ s![-.]!_!g;
427 my $cut;
428 if (length($expected_guard) > length($guard) &&
429 substr($expected_guard, -length($guard) - 1, 1) eq '/' &&
430 substr($expected_guard, -length($guard)) eq $guard) {
431 $cut = -1;
432 } else {
433 for my $i (1 .. length($guard)) {
434 my $ch_e = substr($expected_guard, -$i, 1);
435 my $ch_g = substr($guard, -$i, 1);
436 next if ($ch_e eq $ch_g);
437 last if ($ch_e ne '/' || $ch_g ne '_');
438 $cut = $i;
441 if (!defined $cut) {
442 diagnostic('error', "include guard macro should match filename", $fullline);
444 my $prefix = 'XAPIAN_INCLUDED_';
445 if ($fnm =~ m!.*omega/(?:.*/)?!) {
446 $prefix = 'OMEGA_INCLUDED_';
448 #} elsif ($fnm =~ s!.*xapian-core/.*/!!) {
449 # $expected_guard = "XAPIAN_INCLUDED_" . $expected_guard;
450 #} elsif ($fnm =~ s!.*xapian-letor/.*/!!) {
451 #$expected_guard = "XAPIAN_INCLUDED_" . $expected_guard;
452 if (defined $cut && $cut == -1) {
453 diagnostic('error', "include guard macro should use prefix '$prefix'", $fullline);
454 } elsif (defined $cut && substr($guard, 0, length($guard) - $cut + 1) ne $prefix) {
455 diagnostic('error', "include guard macro should use prefix '$prefix'", $fullline);
456 } elsif ($guard !~ /^\Q$prefix\E/) {
457 diagnostic('error', "include guard macro should use prefix '$prefix'", $fullline);
460 } else {
461 if (!($type eq 'define' && $header_guard_macro->[0] ne 'ifndef')) {
462 my $expected_guard = $header_guard_macro->[1];
463 $header_guard_macro->[0] = $type;
464 if ($guard ne $expected_guard) {
465 diagnostic('error', "include guard macro should be $expected_guard", $fullline);
470 } else {
471 if (m!^\+\s*#\s*define\s+[A-Z]\+_INCLUDED_!) {
472 diagnostic('error', "include guard macro defined in non-header", $fullline);
475 } elsif (defined $lang && $lang eq 'py') {
476 if (/^\+.*;\s*$/) {
477 diagnostic('error', "';' at end of line of python code", $fullline);
480 if ($fnm !~ m!xapian-check-patch|ChangeLog|NEWS|stemming/.*/(?:voc|output)\.txt$!) {
481 if (/^\+.*?\b(xapain|the the|initialsing|ipv5|outputing)\b/i ||
482 # Cases which just need to be the prefix of a word
483 /^\+.*?\b((?:deafult|parm|peform|acessor|comptib|seach|seperat|seprat|separater|iteratat|calulat|delimitor|charactor)[a-z]*\b)/i ||
484 # Case-sensitive cases
485 /^\+.*?\b(and and)\b/) {
486 diagnostic('error', "Typo '$1'", $fullline);
490 my $first_char = substr($fullline, 0, 1);
491 if ($first_char eq ' ') {
492 ++$lineno;
493 } elsif ($first_char eq '+') {
494 ++$lineno;
495 ++$add_lines;
496 } elsif ($first_char eq '-') {
497 ++$del_lines;
498 } elsif ($first_char eq '\\') {
499 # "\ No newline at end of file" - if preceded by a "+" line, this means
500 # that the patch leaves the file missing a newline at the end.
501 if ($last_first_char eq '+') {
502 diagnostic('error', 'No newline at end of file');
505 $last_first_char = $first_char;
507 if (scalar keys %count) {
508 for (sort keys %count) {
509 print STDERR "$_ count:\t$count{$_}\n";
511 print STDERR "\n";
513 print STDERR <<"__END__";
514 Files patched:\t$files
515 Lines added:\t$add_lines
516 Lines removed:\t$del_lines
517 __END__
518 exit(exists $count{'error'} ? 1 : 0);