3 """The Tab Nanny despises ambiguous indentation. She knows no mercy.
5 tabnanny -- Detection of ambiguous indentation
7 For the time being this module is intended to be called as a script.
8 However it is possible to import it into an IDE and use the function
9 check() described below.
11 Warning: The API provided by this module is likely to change in future
12 releases; such changes may not be backward compatible.
15 # Released to the public domain, by Tim Peters, 15 April 1998.
17 # XXX Note: this is now a standard library module.
18 # XXX The API needs to undergo changes however; the current code is too
19 # XXX script-like. This will be addressed later.
27 if not hasattr(tokenize
, 'NL'):
28 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
30 __all__
= ["check", "NannyNag", "process_tokens"]
38 sys
.stderr
.write(sep
+ str(arg
))
40 sys
.stderr
.write("\n")
43 global verbose
, filename_only
45 opts
, args
= getopt
.getopt(sys
.argv
[1:], "qv")
46 except getopt
.error
, msg
:
51 filename_only
= filename_only
+ 1
55 errprint("Usage:", sys
.argv
[0], "[-v] file_or_directory ...")
60 class NannyNag(Exception):
62 Raised by tokeneater() if detecting an ambiguous indent.
63 Captured and handled in check().
65 def __init__(self
, lineno
, msg
, line
):
66 self
.lineno
, self
.msg
, self
.line
= lineno
, msg
, line
77 If file_or_dir is a directory and not a symbolic link, then recursively
78 descend the directory tree named by file_or_dir, checking all .py files
79 along the way. If file_or_dir is an ordinary Python source file, it is
80 checked for whitespace related problems. The diagnostic messages are
81 written to standard output using the print statement.
84 if os
.path
.isdir(file) and not os
.path
.islink(file):
86 print "%r: listing directory" % (file,)
87 names
= os
.listdir(file)
89 fullname
= os
.path
.join(file, name
)
90 if (os
.path
.isdir(fullname
) and
91 not os
.path
.islink(fullname
) or
92 os
.path
.normcase(name
[-3:]) == ".py"):
99 errprint("%r: I/O Error: %s" % (file, msg
))
103 print "checking %r ..." % file
106 process_tokens(tokenize
.generate_tokens(f
.readline
))
108 except tokenize
.TokenError
, msg
:
109 errprint("%r: Token Error: %s" % (file, msg
))
112 except IndentationError, msg
:
113 errprint("%r: Indentation Error: %s" % (file, msg
))
116 except NannyNag
, nag
:
117 badline
= nag
.get_lineno()
118 line
= nag
.get_line()
120 print "%r: *** Line %d: trouble in tab city! ***" % (file, badline
)
121 print "offending line: %r" % (line
,)
124 if ' ' in file: file = '"' + file + '"'
125 if filename_only
: print file
126 else: print file, badline
, repr(line
)
130 print "%r: Clean bill of health." % (file,)
133 # the characters used for space and tab
138 # the original string
140 # the number of leading whitespace characters in raw
142 # the number of tabs in raw[:n]
144 # the normal form as a pair (count, trailing), where:
146 # a tuple such that raw[:n] contains count[i]
147 # instances of S * i + T
149 # the number of trailing spaces in raw[:n]
150 # It's A Theorem that m.indent_level(t) ==
151 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
153 # true iff raw[:n] is of the form (T*)(S*)
155 def __init__(self
, ws
):
157 S
, T
= Whitespace
.S
, Whitespace
.T
168 count
= count
+ [0] * (b
- len(count
) + 1)
169 count
[b
] = count
[b
] + 1
175 self
.norm
= tuple(count
), b
176 self
.is_simple
= len(count
) <= 1
178 # return length of longest contiguous run of spaces (whether or not
180 def longest_run_of_spaces(self
):
181 count
, trailing
= self
.norm
182 return max(len(count
)-1, trailing
)
184 def indent_level(self
, tabsize
):
185 # count, il = self.norm
186 # for i in range(len(count)):
188 # il = il + (i/tabsize + 1)*tabsize * count[i]
192 # il = trailing + sum (i/ts + 1)*ts*count[i] =
193 # trailing + ts * sum (i/ts + 1)*count[i] =
194 # trailing + ts * sum i/ts*count[i] + count[i] =
195 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
196 # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
197 # and note that i/ts*count[i] is 0 when i < ts
199 count
, trailing
= self
.norm
201 for i
in range(tabsize
, len(count
)):
202 il
= il
+ i
/tabsize
* count
[i
]
203 return trailing
+ tabsize
* (il
+ self
.nt
)
205 # return true iff self.indent_level(t) == other.indent_level(t)
207 def equal(self
, other
):
208 return self
.norm
== other
.norm
210 # return a list of tuples (ts, i1, i2) such that
211 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
212 # Intended to be used after not self.equal(other) is known, in which
213 # case it will return at least one witnessing tab size.
214 def not_equal_witness(self
, other
):
215 n
= max(self
.longest_run_of_spaces(),
216 other
.longest_run_of_spaces()) + 1
218 for ts
in range(1, n
+1):
219 if self
.indent_level(ts
) != other
.indent_level(ts
):
221 self
.indent_level(ts
),
222 other
.indent_level(ts
)) )
225 # Return True iff self.indent_level(t) < other.indent_level(t)
227 # The algorithm is due to Vincent Broman.
228 # Easy to prove it's correct.
230 # Trivial to prove n is sharp (consider T vs ST).
231 # Unknown whether there's a faster general way. I suspected so at
232 # first, but no longer.
233 # For the special (but common!) case where M and N are both of the
234 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
235 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
237 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
238 def less(self
, other
):
239 if self
.n
>= other
.n
:
241 if self
.is_simple
and other
.is_simple
:
242 return self
.nt
<= other
.nt
243 n
= max(self
.longest_run_of_spaces(),
244 other
.longest_run_of_spaces()) + 1
245 # the self.n >= other.n test already did it for ts=1
246 for ts
in range(2, n
+1):
247 if self
.indent_level(ts
) >= other
.indent_level(ts
):
251 # return a list of tuples (ts, i1, i2) such that
252 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
253 # Intended to be used after not self.less(other) is known, in which
254 # case it will return at least one witnessing tab size.
255 def not_less_witness(self
, other
):
256 n
= max(self
.longest_run_of_spaces(),
257 other
.longest_run_of_spaces()) + 1
259 for ts
in range(1, n
+1):
260 if self
.indent_level(ts
) >= other
.indent_level(ts
):
262 self
.indent_level(ts
),
263 other
.indent_level(ts
)) )
266 def format_witnesses(w
):
267 firsts
= map(lambda tup
: str(tup
[0]), w
)
268 prefix
= "at tab size"
270 prefix
= prefix
+ "s"
271 return prefix
+ " " + ', '.join(firsts
)
273 def process_tokens(tokens
):
274 INDENT
= tokenize
.INDENT
275 DEDENT
= tokenize
.DEDENT
276 NEWLINE
= tokenize
.NEWLINE
277 JUNK
= tokenize
.COMMENT
, tokenize
.NL
278 indents
= [Whitespace("")]
281 for (type, token
, start
, end
, line
) in tokens
:
283 # a program statement, or ENDMARKER, will eventually follow,
284 # after some (possibly empty) run of tokens of the form
285 # (NL | COMMENT)* (INDENT | DEDENT+)?
286 # If an INDENT appears, setting check_equal is wrong, and will
287 # be undone when we see the INDENT.
292 thisguy
= Whitespace(token
)
293 if not indents
[-1].less(thisguy
):
294 witness
= indents
[-1].not_less_witness(thisguy
)
295 msg
= "indent not greater e.g. " + format_witnesses(witness
)
296 raise NannyNag(start
[0], msg
, line
)
297 indents
.append(thisguy
)
300 # there's nothing we need to check here! what's important is
301 # that when the run of DEDENTs ends, the indentation of the
302 # program statement (or ENDMARKER) that triggered the run is
303 # equal to what's left at the top of the indents stack
305 # Ouch! This assert triggers if the last line of the source
306 # is indented *and* lacks a newline -- then DEDENTs pop out
308 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
313 elif check_equal
and type not in JUNK
:
314 # this is the first "real token" following a NEWLINE, so it
315 # must be the first token of the next program statement, or an
316 # ENDMARKER; the "line" argument exposes the leading whitespace
317 # for this statement; in the case of ENDMARKER, line is an empty
318 # string, so will properly match the empty string with which the
319 # "indents" stack was seeded
321 thisguy
= Whitespace(line
)
322 if not indents
[-1].equal(thisguy
):
323 witness
= indents
[-1].not_equal_witness(thisguy
)
324 msg
= "indent not equal e.g. " + format_witnesses(witness
)
325 raise NannyNag(start
[0], msg
, line
)
328 if __name__
== '__main__':