3 """The Tab Nanny despises ambiguous indentation. She knows no mercy.
5 tabnanny -- Detection of ambiguous indentation
7 For the time being this module is intended to be called as a script.
8 However it is possible to import it into an IDE and use the function
9 check() described below.
11 Warning: The API provided by this module is likely to change in future
12 releases; such changes may not be backward compatible.
15 # Released to the public domain, by Tim Peters, 15 April 1998.
17 # XXX Note: this is now a standard library module.
18 # XXX The API needs to undergo changes however; the current code is too
19 # XXX script-like. This will be addressed later.
27 if not hasattr(tokenize
, 'NL'):
28 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
30 __all__
= ["check", "NannyNag", "process_tokens"]
38 sys
.stderr
.write(sep
+ str(arg
))
40 sys
.stderr
.write("\n")
43 global verbose
, filename_only
45 opts
, args
= getopt
.getopt(sys
.argv
[1:], "qv")
46 except getopt
.error
, msg
:
51 filename_only
= filename_only
+ 1
55 errprint("Usage:", sys
.argv
[0], "[-v] file_or_directory ...")
60 class NannyNag(Exception):
62 Raised by tokeneater() if detecting an ambiguous indent.
63 Captured and handled in check().
65 def __init__(self
, lineno
, msg
, line
):
66 self
.lineno
, self
.msg
, self
.line
= lineno
, msg
, line
77 If file_or_dir is a directory and not a symbolic link, then recursively
78 descend the directory tree named by file_or_dir, checking all .py files
79 along the way. If file_or_dir is an ordinary Python source file, it is
80 checked for whitespace related problems. The diagnostic messages are
81 written to standard output using the print statement.
84 if os
.path
.isdir(file) and not os
.path
.islink(file):
86 print "%r: listing directory" % (file,)
87 names
= os
.listdir(file)
89 fullname
= os
.path
.join(file, name
)
90 if (os
.path
.isdir(fullname
) and
91 not os
.path
.islink(fullname
) or
92 os
.path
.normcase(name
[-3:]) == ".py"):
99 errprint("%r: I/O Error: %s" % (file, msg
))
103 print "checking %r ..." % file
106 process_tokens(tokenize
.generate_tokens(f
.readline
))
108 except tokenize
.TokenError
, msg
:
109 errprint("%r: Token Error: %s" % (file, msg
))
112 except NannyNag
, nag
:
113 badline
= nag
.get_lineno()
114 line
= nag
.get_line()
116 print "%r: *** Line %d: trouble in tab city! ***" % (file, badline
)
117 print "offending line: %r" % (line
,)
120 if ' ' in file: file = '"' + file + '"'
121 if filename_only
: print file
122 else: print file, badline
, repr(line
)
126 print "%r: Clean bill of health." % (file,)
129 # the characters used for space and tab
134 # the original string
136 # the number of leading whitespace characters in raw
138 # the number of tabs in raw[:n]
140 # the normal form as a pair (count, trailing), where:
142 # a tuple such that raw[:n] contains count[i]
143 # instances of S * i + T
145 # the number of trailing spaces in raw[:n]
146 # It's A Theorem that m.indent_level(t) ==
147 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
149 # true iff raw[:n] is of the form (T*)(S*)
151 def __init__(self
, ws
):
153 S
, T
= Whitespace
.S
, Whitespace
.T
164 count
= count
+ [0] * (b
- len(count
) + 1)
165 count
[b
] = count
[b
] + 1
171 self
.norm
= tuple(count
), b
172 self
.is_simple
= len(count
) <= 1
174 # return length of longest contiguous run of spaces (whether or not
176 def longest_run_of_spaces(self
):
177 count
, trailing
= self
.norm
178 return max(len(count
)-1, trailing
)
180 def indent_level(self
, tabsize
):
181 # count, il = self.norm
182 # for i in range(len(count)):
184 # il = il + (i/tabsize + 1)*tabsize * count[i]
188 # il = trailing + sum (i/ts + 1)*ts*count[i] =
189 # trailing + ts * sum (i/ts + 1)*count[i] =
190 # trailing + ts * sum i/ts*count[i] + count[i] =
191 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
192 # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
193 # and note that i/ts*count[i] is 0 when i < ts
195 count
, trailing
= self
.norm
197 for i
in range(tabsize
, len(count
)):
198 il
= il
+ i
/tabsize
* count
[i
]
199 return trailing
+ tabsize
* (il
+ self
.nt
)
201 # return true iff self.indent_level(t) == other.indent_level(t)
203 def equal(self
, other
):
204 return self
.norm
== other
.norm
206 # return a list of tuples (ts, i1, i2) such that
207 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
208 # Intended to be used after not self.equal(other) is known, in which
209 # case it will return at least one witnessing tab size.
210 def not_equal_witness(self
, other
):
211 n
= max(self
.longest_run_of_spaces(),
212 other
.longest_run_of_spaces()) + 1
214 for ts
in range(1, n
+1):
215 if self
.indent_level(ts
) != other
.indent_level(ts
):
217 self
.indent_level(ts
),
218 other
.indent_level(ts
)) )
221 # Return True iff self.indent_level(t) < other.indent_level(t)
223 # The algorithm is due to Vincent Broman.
224 # Easy to prove it's correct.
226 # Trivial to prove n is sharp (consider T vs ST).
227 # Unknown whether there's a faster general way. I suspected so at
228 # first, but no longer.
229 # For the special (but common!) case where M and N are both of the
230 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
231 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
233 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
234 def less(self
, other
):
235 if self
.n
>= other
.n
:
237 if self
.is_simple
and other
.is_simple
:
238 return self
.nt
<= other
.nt
239 n
= max(self
.longest_run_of_spaces(),
240 other
.longest_run_of_spaces()) + 1
241 # the self.n >= other.n test already did it for ts=1
242 for ts
in range(2, n
+1):
243 if self
.indent_level(ts
) >= other
.indent_level(ts
):
247 # return a list of tuples (ts, i1, i2) such that
248 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
249 # Intended to be used after not self.less(other) is known, in which
250 # case it will return at least one witnessing tab size.
251 def not_less_witness(self
, other
):
252 n
= max(self
.longest_run_of_spaces(),
253 other
.longest_run_of_spaces()) + 1
255 for ts
in range(1, n
+1):
256 if self
.indent_level(ts
) >= other
.indent_level(ts
):
258 self
.indent_level(ts
),
259 other
.indent_level(ts
)) )
262 def format_witnesses(w
):
263 firsts
= map(lambda tup
: str(tup
[0]), w
)
264 prefix
= "at tab size"
266 prefix
= prefix
+ "s"
267 return prefix
+ " " + ', '.join(firsts
)
269 def process_tokens(tokens
):
270 INDENT
= tokenize
.INDENT
271 DEDENT
= tokenize
.DEDENT
272 NEWLINE
= tokenize
.NEWLINE
273 JUNK
= tokenize
.COMMENT
, tokenize
.NL
274 indents
= [Whitespace("")]
277 for (type, token
, start
, end
, line
) in tokens
:
279 # a program statement, or ENDMARKER, will eventually follow,
280 # after some (possibly empty) run of tokens of the form
281 # (NL | COMMENT)* (INDENT | DEDENT+)?
282 # If an INDENT appears, setting check_equal is wrong, and will
283 # be undone when we see the INDENT.
288 thisguy
= Whitespace(token
)
289 if not indents
[-1].less(thisguy
):
290 witness
= indents
[-1].not_less_witness(thisguy
)
291 msg
= "indent not greater e.g. " + format_witnesses(witness
)
292 raise NannyNag(start
[0], msg
, line
)
293 indents
.append(thisguy
)
296 # there's nothing we need to check here! what's important is
297 # that when the run of DEDENTs ends, the indentation of the
298 # program statement (or ENDMARKER) that triggered the run is
299 # equal to what's left at the top of the indents stack
301 # Ouch! This assert triggers if the last line of the source
302 # is indented *and* lacks a newline -- then DEDENTs pop out
304 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
309 elif check_equal
and type not in JUNK
:
310 # this is the first "real token" following a NEWLINE, so it
311 # must be the first token of the next program statement, or an
312 # ENDMARKER; the "line" argument exposes the leading whitespace
313 # for this statement; in the case of ENDMARKER, line is an empty
314 # string, so will properly match the empty string with which the
315 # "indents" stack was seeded
317 thisguy
= Whitespace(line
)
318 if not indents
[-1].equal(thisguy
):
319 witness
= indents
[-1].not_equal_witness(thisguy
)
320 msg
= "indent not equal e.g. " + format_witnesses(witness
)
321 raise NannyNag(start
[0], msg
, line
)
324 if __name__
== '__main__':