Fix typo. Backport if anyone cares. :-)
[python.git] / Lib / tabnanny.py
blobf38a79f8a5463728969e5f6bdd13e581b4fd03f0
1 #! /usr/bin/env python
3 """The Tab Nanny despises ambiguous indentation. She knows no mercy.
5 tabnanny -- Detection of ambiguous indentation
7 For the time being this module is intended to be called as a script.
8 However it is possible to import it into an IDE and use the function
9 check() described below.
11 Warning: The API provided by this module is likely to change in future
12 releases; such changes may not be backward compatible.
13 """
15 # Released to the public domain, by Tim Peters, 15 April 1998.
17 # XXX Note: this is now a standard library module.
18 # XXX The API needs to undergo changes however; the current code is too
19 # XXX script-like. This will be addressed later.
21 __version__ = "6"
23 import os
24 import sys
25 import getopt
26 import tokenize
27 if not hasattr(tokenize, 'NL'):
28 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
30 __all__ = ["check", "NannyNag", "process_tokens"]
32 verbose = 0
33 filename_only = 0
35 def errprint(*args):
36 sep = ""
37 for arg in args:
38 sys.stderr.write(sep + str(arg))
39 sep = " "
40 sys.stderr.write("\n")
42 def main():
43 global verbose, filename_only
44 try:
45 opts, args = getopt.getopt(sys.argv[1:], "qv")
46 except getopt.error, msg:
47 errprint(msg)
48 return
49 for o, a in opts:
50 if o == '-q':
51 filename_only = filename_only + 1
52 if o == '-v':
53 verbose = verbose + 1
54 if not args:
55 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
56 return
57 for arg in args:
58 check(arg)
60 class NannyNag(Exception):
61 """
62 Raised by tokeneater() if detecting an ambiguous indent.
63 Captured and handled in check().
64 """
65 def __init__(self, lineno, msg, line):
66 self.lineno, self.msg, self.line = lineno, msg, line
67 def get_lineno(self):
68 return self.lineno
69 def get_msg(self):
70 return self.msg
71 def get_line(self):
72 return self.line
74 def check(file):
75 """check(file_or_dir)
77 If file_or_dir is a directory and not a symbolic link, then recursively
78 descend the directory tree named by file_or_dir, checking all .py files
79 along the way. If file_or_dir is an ordinary Python source file, it is
80 checked for whitespace related problems. The diagnostic messages are
81 written to standard output using the print statement.
82 """
84 if os.path.isdir(file) and not os.path.islink(file):
85 if verbose:
86 print "%r: listing directory" % (file,)
87 names = os.listdir(file)
88 for name in names:
89 fullname = os.path.join(file, name)
90 if (os.path.isdir(fullname) and
91 not os.path.islink(fullname) or
92 os.path.normcase(name[-3:]) == ".py"):
93 check(fullname)
94 return
96 try:
97 f = open(file)
98 except IOError, msg:
99 errprint("%r: I/O Error: %s" % (file, msg))
100 return
102 if verbose > 1:
103 print "checking %r ..." % file
105 try:
106 process_tokens(tokenize.generate_tokens(f.readline))
108 except tokenize.TokenError, msg:
109 errprint("%r: Token Error: %s" % (file, msg))
110 return
112 except NannyNag, nag:
113 badline = nag.get_lineno()
114 line = nag.get_line()
115 if verbose:
116 print "%r: *** Line %d: trouble in tab city! ***" % (file, badline)
117 print "offending line: %r" % (line,)
118 print nag.get_msg()
119 else:
120 if ' ' in file: file = '"' + file + '"'
121 if filename_only: print file
122 else: print file, badline, repr(line)
123 return
125 if verbose:
126 print "%r: Clean bill of health." % (file,)
128 class Whitespace:
129 # the characters used for space and tab
130 S, T = ' \t'
132 # members:
133 # raw
134 # the original string
136 # the number of leading whitespace characters in raw
137 # nt
138 # the number of tabs in raw[:n]
139 # norm
140 # the normal form as a pair (count, trailing), where:
141 # count
142 # a tuple such that raw[:n] contains count[i]
143 # instances of S * i + T
144 # trailing
145 # the number of trailing spaces in raw[:n]
146 # It's A Theorem that m.indent_level(t) ==
147 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
148 # is_simple
149 # true iff raw[:n] is of the form (T*)(S*)
151 def __init__(self, ws):
152 self.raw = ws
153 S, T = Whitespace.S, Whitespace.T
154 count = []
155 b = n = nt = 0
156 for ch in self.raw:
157 if ch == S:
158 n = n + 1
159 b = b + 1
160 elif ch == T:
161 n = n + 1
162 nt = nt + 1
163 if b >= len(count):
164 count = count + [0] * (b - len(count) + 1)
165 count[b] = count[b] + 1
166 b = 0
167 else:
168 break
169 self.n = n
170 self.nt = nt
171 self.norm = tuple(count), b
172 self.is_simple = len(count) <= 1
174 # return length of longest contiguous run of spaces (whether or not
175 # preceding a tab)
176 def longest_run_of_spaces(self):
177 count, trailing = self.norm
178 return max(len(count)-1, trailing)
180 def indent_level(self, tabsize):
181 # count, il = self.norm
182 # for i in range(len(count)):
183 # if count[i]:
184 # il = il + (i/tabsize + 1)*tabsize * count[i]
185 # return il
187 # quicker:
188 # il = trailing + sum (i/ts + 1)*ts*count[i] =
189 # trailing + ts * sum (i/ts + 1)*count[i] =
190 # trailing + ts * sum i/ts*count[i] + count[i] =
191 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
192 # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
193 # and note that i/ts*count[i] is 0 when i < ts
195 count, trailing = self.norm
196 il = 0
197 for i in range(tabsize, len(count)):
198 il = il + i/tabsize * count[i]
199 return trailing + tabsize * (il + self.nt)
201 # return true iff self.indent_level(t) == other.indent_level(t)
202 # for all t >= 1
203 def equal(self, other):
204 return self.norm == other.norm
206 # return a list of tuples (ts, i1, i2) such that
207 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
208 # Intended to be used after not self.equal(other) is known, in which
209 # case it will return at least one witnessing tab size.
210 def not_equal_witness(self, other):
211 n = max(self.longest_run_of_spaces(),
212 other.longest_run_of_spaces()) + 1
213 a = []
214 for ts in range(1, n+1):
215 if self.indent_level(ts) != other.indent_level(ts):
216 a.append( (ts,
217 self.indent_level(ts),
218 other.indent_level(ts)) )
219 return a
221 # Return True iff self.indent_level(t) < other.indent_level(t)
222 # for all t >= 1.
223 # The algorithm is due to Vincent Broman.
224 # Easy to prove it's correct.
225 # XXXpost that.
226 # Trivial to prove n is sharp (consider T vs ST).
227 # Unknown whether there's a faster general way. I suspected so at
228 # first, but no longer.
229 # For the special (but common!) case where M and N are both of the
230 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
231 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
232 # XXXwrite that up.
233 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
234 def less(self, other):
235 if self.n >= other.n:
236 return False
237 if self.is_simple and other.is_simple:
238 return self.nt <= other.nt
239 n = max(self.longest_run_of_spaces(),
240 other.longest_run_of_spaces()) + 1
241 # the self.n >= other.n test already did it for ts=1
242 for ts in range(2, n+1):
243 if self.indent_level(ts) >= other.indent_level(ts):
244 return False
245 return True
247 # return a list of tuples (ts, i1, i2) such that
248 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
249 # Intended to be used after not self.less(other) is known, in which
250 # case it will return at least one witnessing tab size.
251 def not_less_witness(self, other):
252 n = max(self.longest_run_of_spaces(),
253 other.longest_run_of_spaces()) + 1
254 a = []
255 for ts in range(1, n+1):
256 if self.indent_level(ts) >= other.indent_level(ts):
257 a.append( (ts,
258 self.indent_level(ts),
259 other.indent_level(ts)) )
260 return a
262 def format_witnesses(w):
263 firsts = map(lambda tup: str(tup[0]), w)
264 prefix = "at tab size"
265 if len(w) > 1:
266 prefix = prefix + "s"
267 return prefix + " " + ', '.join(firsts)
269 def process_tokens(tokens):
270 INDENT = tokenize.INDENT
271 DEDENT = tokenize.DEDENT
272 NEWLINE = tokenize.NEWLINE
273 JUNK = tokenize.COMMENT, tokenize.NL
274 indents = [Whitespace("")]
275 check_equal = 0
277 for (type, token, start, end, line) in tokens:
278 if type == NEWLINE:
279 # a program statement, or ENDMARKER, will eventually follow,
280 # after some (possibly empty) run of tokens of the form
281 # (NL | COMMENT)* (INDENT | DEDENT+)?
282 # If an INDENT appears, setting check_equal is wrong, and will
283 # be undone when we see the INDENT.
284 check_equal = 1
286 elif type == INDENT:
287 check_equal = 0
288 thisguy = Whitespace(token)
289 if not indents[-1].less(thisguy):
290 witness = indents[-1].not_less_witness(thisguy)
291 msg = "indent not greater e.g. " + format_witnesses(witness)
292 raise NannyNag(start[0], msg, line)
293 indents.append(thisguy)
295 elif type == DEDENT:
296 # there's nothing we need to check here! what's important is
297 # that when the run of DEDENTs ends, the indentation of the
298 # program statement (or ENDMARKER) that triggered the run is
299 # equal to what's left at the top of the indents stack
301 # Ouch! This assert triggers if the last line of the source
302 # is indented *and* lacks a newline -- then DEDENTs pop out
303 # of thin air.
304 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
305 check_equal = 1
307 del indents[-1]
309 elif check_equal and type not in JUNK:
310 # this is the first "real token" following a NEWLINE, so it
311 # must be the first token of the next program statement, or an
312 # ENDMARKER; the "line" argument exposes the leading whitespace
313 # for this statement; in the case of ENDMARKER, line is an empty
314 # string, so will properly match the empty string with which the
315 # "indents" stack was seeded
316 check_equal = 0
317 thisguy = Whitespace(line)
318 if not indents[-1].equal(thisguy):
319 witness = indents[-1].not_equal_witness(thisguy)
320 msg = "indent not equal e.g. " + format_witnesses(witness)
321 raise NannyNag(start[0], msg, line)
324 if __name__ == '__main__':
325 main()