cvsimport
[findutils.git] / build-aux / src-sniff.py
blob09f02e80af6d189dee1edc91c9a07918717cc801
1 #! /usr/bin/env python
3 # src-sniff.py: checks source code for patterns that look like common errors.
4 # Copyright (C) 2007 Free Software Foundation, Inc.
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 import re
21 import sys
23 C_ISH_FILENAME = "\.(c|cc|h|cpp|cxx|hxx)$"
24 C_ISH_FILENAME_RE = re.compile(C_ISH_FILENAME)
25 C_MODULE_FILENAME_RE = re.compile("\.(c|cc|cpp|cxx)$")
26 FIRST_INCLUDE = 'config.h'
27 problems = 0
30 def Problem(**kwargs):
31 global problems
32 problems += 1
33 msg = kwargs['message']
34 if kwargs['line']:
35 location = "%(filename)s:%(line)d" % kwargs
36 else:
37 location = "%(filename)s" % kwargs
38 detail = msg % kwargs
39 print >>sys.stderr, "error: %s: %s" % (location, detail)
42 class RegexSniffer(object):
43 def __init__(self, source, message, regexflags=0):
44 super(RegexSniffer, self).__init__()
45 self._regex = re.compile(source, regexflags)
46 self._msg = message
47 def Sniff(self, text, filename, line):
48 #print >>sys.stderr, ("Matching %s against %s"
49 # % (text, self._regex.pattern))
50 m = self._regex.search(text)
51 if m:
52 if line is None:
53 line = 1 + m.string.count('\n', 1, m.start(0))
54 args = {
55 'filename' : filename,
56 'line' : line,
57 'fulltext' : text,
58 'matchtext': m.group(0),
59 'message' : self._msg
61 Problem(**args)
64 class RegexChecker(object):
65 def __init__(self, regex, line_smells, file_smells):
66 super(RegexChecker, self).__init__(self)
67 self._regex = re.compile(regex)
68 self._line_sniffers = [RegexSniffer(s[0],s[1]) for s in line_smells]
69 self._file_sniffers = [RegexSniffer(s[0],s[1],re.S|re.M) for s in file_smells]
70 def Check(self, filename, lines, fulltext):
71 if self._regex.search(filename):
72 # We recognise this type of file.
73 for line_number, line_text in lines:
74 for sniffer in self._line_sniffers:
75 sniffer.Sniff(line_text, filename, line_number)
76 for sniffer in self._file_sniffers:
77 sniffer.Sniff(fulltext, filename, None)
78 else:
79 # We don't know how to check this file. Skip it.
80 pass
83 checkers = [
84 # Check C-like languages for C code smells.
85 RegexChecker(C_ISH_FILENAME_RE,
86 # line smells
88 [r'(?<!\w)free \(\(', "don't cast the argument to free()"],
89 [r'\*\) *x(m|c|re)alloc(?!\w)',"don't cast the result of x*alloc"],
90 [r'\*\) *alloca(?!\w)',"don't cast the result of alloca"],
91 [r'[ ] ',"found SPACE-TAB; remove the space"],
92 [r'(?<!\w)([fs]?scanf|ato([filq]|ll))(?!\w)',
93 'do not use *scan''f, ato''f, ato''i, ato''l, ato''ll, ato''q, or ss''canf'],
94 [r'error \(EXIT_SUCCESS',"passing EXIT_SUCCESS to error is confusing"],
95 [r'file[s]ystem', "prefer writing 'file system' to 'filesystem'"],
96 [r'HAVE''_CONFIG_H', "Avoid checking HAVE_CONFIG_H"],
97 # [r'HAVE_FCNTL_H', "Avoid checking HAVE_FCNTL_H"],
98 [r'O_NDELAY', "Avoid using O_NDELAY"],
99 [r'the *the', "'the the' is probably not deliberate"],
100 [r'(?<!\w)error \([^_"]*[^_]"[^"]*[a-z]{3}', "untranslated error message"],
101 [r'^# *if\s+defined *\(', "useless parentheses in '#if defined'"],
105 [r'# *include <assert.h>(?!.*assert \()',
106 "If you include <assert.h>, use assert()."],
107 [r'# *include "quotearg.h"(?!.*(?<!\w)quotearg(_[^ ]+)? \()',
108 "If you include \"quotearg.h\", use one of its functions."],
109 [r'# *include "quote.h"(?!.*(?<!\w)quote(_[^ ]+)? \()',
110 "If you include \"quote.h\", use one of its functions."],
112 # Check Makefiles for Makefile code smells.
113 RegexChecker('(^|/)[Mm]akefile(.am|.in)?',
114 [ [r'^ ', "Spaces at start of line"], ],
115 []),
116 # Check everything for whitespace problems.
117 # RegexChecker('', [], [[r'\s$', "trailing whitespace"],]),
118 # Check everything for out of date addresses.
119 RegexChecker('', [], [
120 [r'675\s*Mass\s*Ave,\s*02139[^a-zA-Z]*USA',
121 "out of date FSF address"],
122 [r'59 Temple Place.*02111-?1307\s*USA',
123 "out of date FSF address"],
125 # Check everything for GPL version regression
126 RegexChecker('',
128 [[r'G(nu |eneral )?P(ublic )?L(icense)?.{1,200}version [12]',
129 "Out of date GPL version: %(matchtext)s"],
131 # Bourne shell code smells
132 RegexChecker('\.sh$',
134 ['for\s*\w+\s*in.*;\s*do',
135 # Solaris 10 /bin/sh rejects this, see Autoconf manual
136 "for loops should not contain a 'do' on the same line."],
137 ], []),
141 # missing check: ChangeLog prefixes
142 # missing: sc_always_defined_macros from coreutils
143 # missing: sc_tight_scope
146 def Warning(filename, desc):
147 print >> sys.stderr, "warning: %s: %s" % (filename, desc)
150 def BuildIncludeList(text):
151 """Build a list of included files, with line numbers.
152 Args:
153 text: the full text of the source file
154 Returns:
155 [ ('config.h',32), ('assert.h',33), ... ]
157 include_re = re.compile(r'# *include +[<"](.*)[>"]')
158 includes = []
159 last_include_pos = 1
160 line = 1
161 for m in include_re.finditer(text):
162 header = m.group(1)
163 # Count only the number of lines between the last include and
164 # this one. Counting them from the beginning would be quadratic.
165 line += m.string.count('\n', last_include_pos, m.start(0))
166 last_include_pos = m.end()
167 includes.append( (header,line) )
168 return includes
171 def CheckStatHeader(filename, lines, fulltext):
172 stat_hdr_re = re.compile(r'# *include .*<sys/stat.h>')
173 # It's OK to have a pointer though.
174 stat_use_re = re.compile(r'struct stat\W *[^*]')
175 for line in lines:
176 m = stat_use_re.search(line[1])
177 if m:
178 msg = "If you use struct stat, you must #include <sys/stat.h> first"
179 Problem(filename = filename, line = line[0], message = msg)
180 # Diagnose only once
181 break
182 m = stat_hdr_re.search(line[1])
183 if m:
184 break
186 def CheckFirstInclude(filename, lines, fulltext):
187 includes = BuildIncludeList(fulltext)
188 #print "Include map:"
189 #for name, line in includes:
190 # print "%s:%d: %s" % (filename, line, name)
191 if includes:
192 actual_first_include = includes[0][0]
193 else:
194 actual_first_include = None
195 if actual_first_include and actual_first_include != FIRST_INCLUDE:
196 if FIRST_INCLUDE in [inc[0] for inc in includes]:
197 msg = ("%(actual_first_include)s is the first included file, "
198 "but %(required_first_include)s should be included first")
199 Problem(filename=filename, line=includes[0][1], message=msg,
200 actual_first_include=actual_first_include,
201 required_first_include = FIRST_INCLUDE)
202 if FIRST_INCLUDE not in [inc[0] for inc in includes]:
203 Warning(filename,
204 "%s should be included by most files" % FIRST_INCLUDE)
207 def SniffSourceFile(filename, lines, fulltext):
208 if C_MODULE_FILENAME_RE.search(filename):
209 CheckFirstInclude(filename, lines, fulltext)
210 CheckStatHeader (filename, lines, fulltext)
211 for checker in checkers:
212 checker.Check(filename, lines, fulltext)
215 def main(args):
216 "main program"
217 for srcfile in args[1:]:
218 f = open(srcfile)
219 line_number = 1
220 lines = []
221 for line in f.readlines():
222 lines.append( (line_number, line) )
223 line_number += 1
224 fulltext = ''.join([line[1] for line in lines])
225 SniffSourceFile(srcfile, lines, fulltext)
226 f.close()
227 if problems:
228 return 1
229 else:
230 return 0
233 if __name__ == "__main__":
234 sys.exit(main(sys.argv))