s3: don't replace the error message if already defined
[Samba/id10ts.git] / lib / pep8 / pep8.py
blob227a9a3ac43c2353081acec849c9e9bd8d1c764b
1 #!/usr/bin/python
2 # pep8.py - Check Python source code formatting, according to PEP 8
3 # Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net>
5 # Permission is hereby granted, free of charge, to any person
6 # obtaining a copy of this software and associated documentation files
7 # (the "Software"), to deal in the Software without restriction,
8 # including without limitation the rights to use, copy, modify, merge,
9 # publish, distribute, sublicense, and/or sell copies of the Software,
10 # and to permit persons to whom the Software is furnished to do so,
11 # subject to the following conditions:
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 # SOFTWARE.
25 """
26 Check Python source code formatting, according to PEP 8:
27 http://www.python.org/dev/peps/pep-0008/
29 For usage and a list of options, try this:
30 $ python pep8.py -h
32 This program and its regression test suite live here:
33 http://github.com/jcrocholl/pep8
35 Groups of errors and warnings:
36 E errors
37 W warnings
38 100 indentation
39 200 whitespace
40 300 blank lines
41 400 imports
42 500 line length
43 600 deprecation
44 700 statements
46 You can add checks to this program by writing plugins. Each plugin is
47 a simple function that is called for each line of source code, either
48 physical or logical.
50 Physical line:
51 - Raw line of text from the input file.
53 Logical line:
54 - Multi-line statements converted to a single line.
55 - Stripped left and right.
56 - Contents of strings replaced with 'xxx' of same length.
57 - Comments removed.
59 The check function requests physical or logical lines by the name of
60 the first argument:
62 def maximum_line_length(physical_line)
63 def extraneous_whitespace(logical_line)
64 def blank_lines(logical_line, blank_lines, indent_level, line_number)
66 The last example above demonstrates how check plugins can request
67 additional information with extra arguments. All attributes of the
68 Checker object are available. Some examples:
70 lines: a list of the raw lines from the input file
71 tokens: the tokens that contribute to this logical line
72 line_number: line number in the input file
73 blank_lines: blank lines before this one
74 indent_char: first indentation character in this file (' ' or '\t')
75 indent_level: indentation (with tabs expanded to multiples of 8)
76 previous_indent_level: indentation on previous line
77 previous_logical: previous logical line
79 The docstring of each check function shall be the relevant part of
80 text from PEP 8. It is printed if the user enables --show-pep8.
81 Several docstrings contain examples directly from the PEP 8 document.
83 Okay: spam(ham[1], {eggs: 2})
84 E201: spam( ham[1], {eggs: 2})
86 These examples are verified automatically when pep8.py is run with the
87 --doctest option. You can add examples for your own check functions.
88 The format is simple: "Okay" or error/warning code followed by colon
89 and space, the rest of the line is example source code. If you put 'r'
90 before the docstring, you can use \n for newline, \t for tab and \s
91 for space.
93 """
95 __version__ = '0.5.1dev'
97 import os
98 import sys
99 import re
100 import time
101 import inspect
102 import keyword
103 import tokenize
104 from optparse import OptionParser
105 from fnmatch import fnmatch
106 try:
107 frozenset
108 except NameError:
109 from sets import ImmutableSet as frozenset
112 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git'
113 DEFAULT_IGNORE = 'E24'
114 MAX_LINE_LENGTH = 79
116 INDENT_REGEX = re.compile(r'([ \t]*)')
117 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)')
118 SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)')
119 ERRORCODE_REGEX = re.compile(r'[EW]\d{3}')
120 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
121 WHITESPACE_AROUND_OPERATOR_REGEX = \
122 re.compile('([^\w\s]*)\s*(\t| )\s*([^\w\s]*)')
123 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
124 WHITESPACE_AROUND_NAMED_PARAMETER_REGEX = \
125 re.compile(r'[()]|\s=[^=]|[^=!<>]=\s')
128 WHITESPACE = ' \t'
130 BINARY_OPERATORS = frozenset(['**=', '*=', '+=', '-=', '!=', '<>',
131 '%=', '^=', '&=', '|=', '==', '/=', '//=', '<=', '>=', '<<=', '>>=',
132 '%', '^', '&', '|', '=', '/', '//', '<', '>', '<<'])
133 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
134 OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS
135 SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.INDENT,
136 tokenize.DEDENT, tokenize.NEWLINE])
137 E225NOT_KEYWORDS = (frozenset(keyword.kwlist + ['print']) -
138 frozenset(['False', 'None', 'True']))
139 BENCHMARK_KEYS = ('directories', 'files', 'logical lines', 'physical lines')
141 options = None
142 args = None
145 ##############################################################################
146 # Plugins (check functions) for physical lines
147 ##############################################################################
150 def tabs_or_spaces(physical_line, indent_char):
151 r"""
152 Never mix tabs and spaces.
154 The most popular way of indenting Python is with spaces only. The
155 second-most popular way is with tabs only. Code indented with a mixture
156 of tabs and spaces should be converted to using spaces exclusively. When
157 invoking the Python command line interpreter with the -t option, it issues
158 warnings about code that illegally mixes tabs and spaces. When using -tt
159 these warnings become errors. These options are highly recommended!
161 Okay: if a == 0:\n a = 1\n b = 1
162 E101: if a == 0:\n a = 1\n\tb = 1
164 indent = INDENT_REGEX.match(physical_line).group(1)
165 for offset, char in enumerate(indent):
166 if char != indent_char:
167 return offset, "E101 indentation contains mixed spaces and tabs"
170 def tabs_obsolete(physical_line):
171 r"""
172 For new projects, spaces-only are strongly recommended over tabs. Most
173 editors have features that make this easy to do.
175 Okay: if True:\n return
176 W191: if True:\n\treturn
178 indent = INDENT_REGEX.match(physical_line).group(1)
179 if indent.count('\t'):
180 return indent.index('\t'), "W191 indentation contains tabs"
183 def trailing_whitespace(physical_line):
184 r"""
185 JCR: Trailing whitespace is superfluous.
186 FBM: Except when it occurs as part of a blank line (i.e. the line is
187 nothing but whitespace). According to Python docs[1] a line with only
188 whitespace is considered a blank line, and is to be ignored. However,
189 matching a blank line to its indentation level avoids mistakenly
190 terminating a multi-line statement (e.g. class declaration) when
191 pasting code into the standard Python interpreter.
193 [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines
195 The warning returned varies on whether the line itself is blank, for easier
196 filtering for those who want to indent their blank lines.
198 Okay: spam(1)
199 W291: spam(1)\s
200 W293: class Foo(object):\n \n bang = 12
202 physical_line = physical_line.rstrip('\n') # chr(10), newline
203 physical_line = physical_line.rstrip('\r') # chr(13), carriage return
204 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
205 stripped = physical_line.rstrip()
206 if physical_line != stripped:
207 if stripped:
208 return len(stripped), "W291 trailing whitespace"
209 else:
210 return 0, "W293 blank line contains whitespace"
213 def trailing_blank_lines(physical_line, lines, line_number):
214 r"""
215 JCR: Trailing blank lines are superfluous.
217 Okay: spam(1)
218 W391: spam(1)\n
220 if physical_line.strip() == '' and line_number == len(lines):
221 return 0, "W391 blank line at end of file"
224 def missing_newline(physical_line):
226 JCR: The last line should have a newline.
228 if physical_line.rstrip() == physical_line:
229 return len(physical_line), "W292 no newline at end of file"
232 def maximum_line_length(physical_line):
234 Limit all lines to a maximum of 79 characters.
236 There are still many devices around that are limited to 80 character
237 lines; plus, limiting windows to 80 characters makes it possible to have
238 several windows side-by-side. The default wrapping on such devices looks
239 ugly. Therefore, please limit all lines to a maximum of 79 characters.
240 For flowing long blocks of text (docstrings or comments), limiting the
241 length to 72 characters is recommended.
243 line = physical_line.rstrip()
244 length = len(line)
245 if length > MAX_LINE_LENGTH:
246 try:
247 # The line could contain multi-byte characters
248 if not hasattr(line, 'decode'): # Python 3
249 line = line.encode('latin-1')
250 length = len(line.decode('utf-8'))
251 except UnicodeDecodeError:
252 pass
253 if length > MAX_LINE_LENGTH:
254 return MAX_LINE_LENGTH, "E501 line too long (%d characters)" % length
257 ##############################################################################
258 # Plugins (check functions) for logical lines
259 ##############################################################################
262 def blank_lines(logical_line, blank_lines, indent_level, line_number,
263 previous_logical, previous_indent_level,
264 blank_lines_before_comment):
265 r"""
266 Separate top-level function and class definitions with two blank lines.
268 Method definitions inside a class are separated by a single blank line.
270 Extra blank lines may be used (sparingly) to separate groups of related
271 functions. Blank lines may be omitted between a bunch of related
272 one-liners (e.g. a set of dummy implementations).
274 Use blank lines in functions, sparingly, to indicate logical sections.
276 Okay: def a():\n pass\n\n\ndef b():\n pass
277 Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass
279 E301: class Foo:\n b = 0\n def bar():\n pass
280 E302: def a():\n pass\n\ndef b(n):\n pass
281 E303: def a():\n pass\n\n\n\ndef b(n):\n pass
282 E303: def a():\n\n\n\n pass
283 E304: @decorator\n\ndef a():\n pass
285 if line_number == 1:
286 return # Don't expect blank lines before the first line
287 max_blank_lines = max(blank_lines, blank_lines_before_comment)
288 if previous_logical.startswith('@'):
289 if max_blank_lines:
290 return 0, "E304 blank lines found after function decorator"
291 elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2):
292 return 0, "E303 too many blank lines (%d)" % max_blank_lines
293 elif (logical_line.startswith('def ') or
294 logical_line.startswith('class ') or
295 logical_line.startswith('@')):
296 if indent_level:
297 if not (max_blank_lines or previous_indent_level < indent_level or
298 DOCSTRING_REGEX.match(previous_logical)):
299 return 0, "E301 expected 1 blank line, found 0"
300 elif max_blank_lines != 2:
301 return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines
304 def extraneous_whitespace(logical_line):
306 Avoid extraneous whitespace in the following situations:
308 - Immediately inside parentheses, brackets or braces.
310 - Immediately before a comma, semicolon, or colon.
312 Okay: spam(ham[1], {eggs: 2})
313 E201: spam( ham[1], {eggs: 2})
314 E201: spam(ham[ 1], {eggs: 2})
315 E201: spam(ham[1], { eggs: 2})
316 E202: spam(ham[1], {eggs: 2} )
317 E202: spam(ham[1 ], {eggs: 2})
318 E202: spam(ham[1], {eggs: 2 })
320 E203: if x == 4: print x, y; x, y = y , x
321 E203: if x == 4: print x, y ; x, y = y, x
322 E203: if x == 4 : print x, y; x, y = y, x
324 line = logical_line
325 for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
326 text = match.group()
327 char = text.strip()
328 found = match.start()
329 if text == char + ' ' and char in '([{':
330 return found + 1, "E201 whitespace after '%s'" % char
331 if text == ' ' + char and line[found - 1] != ',':
332 if char in '}])':
333 return found, "E202 whitespace before '%s'" % char
334 if char in ',;:':
335 return found, "E203 whitespace before '%s'" % char
338 def missing_whitespace(logical_line):
340 JCR: Each comma, semicolon or colon should be followed by whitespace.
342 Okay: [a, b]
343 Okay: (3,)
344 Okay: a[1:4]
345 Okay: a[:4]
346 Okay: a[1:]
347 Okay: a[1:4:2]
348 E231: ['a','b']
349 E231: foo(bar,baz)
351 line = logical_line
352 for index in range(len(line) - 1):
353 char = line[index]
354 if char in ',;:' and line[index + 1] not in WHITESPACE:
355 before = line[:index]
356 if char == ':' and before.count('[') > before.count(']'):
357 continue # Slice syntax, no space required
358 if char == ',' and line[index + 1] == ')':
359 continue # Allow tuple with only one element: (3,)
360 return index, "E231 missing whitespace after '%s'" % char
363 def indentation(logical_line, previous_logical, indent_char,
364 indent_level, previous_indent_level):
365 r"""
366 Use 4 spaces per indentation level.
368 For really old code that you don't want to mess up, you can continue to
369 use 8-space tabs.
371 Okay: a = 1
372 Okay: if a == 0:\n a = 1
373 E111: a = 1
375 Okay: for item in items:\n pass
376 E112: for item in items:\npass
378 Okay: a = 1\nb = 2
379 E113: a = 1\n b = 2
381 if indent_char == ' ' and indent_level % 4:
382 return 0, "E111 indentation is not a multiple of four"
383 indent_expect = previous_logical.endswith(':')
384 if indent_expect and indent_level <= previous_indent_level:
385 return 0, "E112 expected an indented block"
386 if indent_level > previous_indent_level and not indent_expect:
387 return 0, "E113 unexpected indentation"
390 def whitespace_before_parameters(logical_line, tokens):
392 Avoid extraneous whitespace in the following situations:
394 - Immediately before the open parenthesis that starts the argument
395 list of a function call.
397 - Immediately before the open parenthesis that starts an indexing or
398 slicing.
400 Okay: spam(1)
401 E211: spam (1)
403 Okay: dict['key'] = list[index]
404 E211: dict ['key'] = list[index]
405 E211: dict['key'] = list [index]
407 prev_type = tokens[0][0]
408 prev_text = tokens[0][1]
409 prev_end = tokens[0][3]
410 for index in range(1, len(tokens)):
411 token_type, text, start, end, line = tokens[index]
412 if (token_type == tokenize.OP and
413 text in '([' and
414 start != prev_end and
415 (prev_type == tokenize.NAME or prev_text in '}])') and
416 # Syntax "class A (B):" is allowed, but avoid it
417 (index < 2 or tokens[index - 2][1] != 'class') and
418 # Allow "return (a.foo for a in range(5))"
419 (not keyword.iskeyword(prev_text))):
420 return prev_end, "E211 whitespace before '%s'" % text
421 prev_type = token_type
422 prev_text = text
423 prev_end = end
426 def whitespace_around_operator(logical_line):
428 Avoid extraneous whitespace in the following situations:
430 - More than one space around an assignment (or other) operator to
431 align it with another.
433 Okay: a = 12 + 3
434 E221: a = 4 + 5
435 E222: a = 4 + 5
436 E223: a = 4\t+ 5
437 E224: a = 4 +\t5
439 for match in WHITESPACE_AROUND_OPERATOR_REGEX.finditer(logical_line):
440 before, whitespace, after = match.groups()
441 tab = whitespace == '\t'
442 offset = match.start(2)
443 if before in OPERATORS:
444 return offset, (tab and "E224 tab after operator" or
445 "E222 multiple spaces after operator")
446 elif after in OPERATORS:
447 return offset, (tab and "E223 tab before operator" or
448 "E221 multiple spaces before operator")
451 def missing_whitespace_around_operator(logical_line, tokens):
452 r"""
453 - Always surround these binary operators with a single space on
454 either side: assignment (=), augmented assignment (+=, -= etc.),
455 comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
456 Booleans (and, or, not).
458 - Use spaces around arithmetic operators.
460 Okay: i = i + 1
461 Okay: submitted += 1
462 Okay: x = x * 2 - 1
463 Okay: hypot2 = x * x + y * y
464 Okay: c = (a + b) * (a - b)
465 Okay: foo(bar, key='word', *args, **kwargs)
466 Okay: baz(**kwargs)
467 Okay: negative = -1
468 Okay: spam(-1)
469 Okay: alpha[:-i]
470 Okay: if not -5 < x < +5:\n pass
471 Okay: lambda *args, **kw: (args, kw)
473 E225: i=i+1
474 E225: submitted +=1
475 E225: x = x*2 - 1
476 E225: hypot2 = x*x + y*y
477 E225: c = (a+b) * (a-b)
478 E225: c = alpha -4
479 E225: z = x **y
481 parens = 0
482 need_space = False
483 prev_type = tokenize.OP
484 prev_text = prev_end = None
485 for token_type, text, start, end, line in tokens:
486 if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
487 # ERRORTOKEN is triggered by backticks in Python 3000
488 continue
489 if text in ('(', 'lambda'):
490 parens += 1
491 elif text == ')':
492 parens -= 1
493 if need_space:
494 if start != prev_end:
495 need_space = False
496 elif text == '>' and prev_text == '<':
497 # Tolerate the "<>" operator, even if running Python 3
498 pass
499 else:
500 return prev_end, "E225 missing whitespace around operator"
501 elif token_type == tokenize.OP and prev_end is not None:
502 if text == '=' and parens:
503 # Allow keyword args or defaults: foo(bar=None).
504 pass
505 elif text in BINARY_OPERATORS:
506 need_space = True
507 elif text in UNARY_OPERATORS:
508 # Allow unary operators: -123, -x, +1.
509 # Allow argument unpacking: foo(*args, **kwargs).
510 if prev_type == tokenize.OP:
511 if prev_text in '}])':
512 need_space = True
513 elif prev_type == tokenize.NAME:
514 if prev_text not in E225NOT_KEYWORDS:
515 need_space = True
516 else:
517 need_space = True
518 if need_space and start == prev_end:
519 return prev_end, "E225 missing whitespace around operator"
520 prev_type = token_type
521 prev_text = text
522 prev_end = end
525 def whitespace_around_comma(logical_line):
527 Avoid extraneous whitespace in the following situations:
529 - More than one space around an assignment (or other) operator to
530 align it with another.
532 JCR: This should also be applied around comma etc.
533 Note: these checks are disabled by default
535 Okay: a = (1, 2)
536 E241: a = (1, 2)
537 E242: a = (1,\t2)
539 line = logical_line
540 for separator in ',;:':
541 found = line.find(separator + ' ')
542 if found > -1:
543 return found + 1, "E241 multiple spaces after '%s'" % separator
544 found = line.find(separator + '\t')
545 if found > -1:
546 return found + 1, "E242 tab after '%s'" % separator
549 def whitespace_around_named_parameter_equals(logical_line):
551 Don't use spaces around the '=' sign when used to indicate a
552 keyword argument or a default parameter value.
554 Okay: def complex(real, imag=0.0):
555 Okay: return magic(r=real, i=imag)
556 Okay: boolean(a == b)
557 Okay: boolean(a != b)
558 Okay: boolean(a <= b)
559 Okay: boolean(a >= b)
561 E251: def complex(real, imag = 0.0):
562 E251: return magic(r = real, i = imag)
564 parens = 0
565 for match in WHITESPACE_AROUND_NAMED_PARAMETER_REGEX.finditer(
566 logical_line):
567 text = match.group()
568 if parens and len(text) == 3:
569 issue = "E251 no spaces around keyword / parameter equals"
570 return match.start(), issue
571 if text == '(':
572 parens += 1
573 elif text == ')':
574 parens -= 1
577 def whitespace_before_inline_comment(logical_line, tokens):
579 Separate inline comments by at least two spaces.
581 An inline comment is a comment on the same line as a statement. Inline
582 comments should be separated by at least two spaces from the statement.
583 They should start with a # and a single space.
585 Okay: x = x + 1 # Increment x
586 Okay: x = x + 1 # Increment x
587 E261: x = x + 1 # Increment x
588 E262: x = x + 1 #Increment x
589 E262: x = x + 1 # Increment x
591 prev_end = (0, 0)
592 for token_type, text, start, end, line in tokens:
593 if token_type == tokenize.NL:
594 continue
595 if token_type == tokenize.COMMENT:
596 if not line[:start[1]].strip():
597 continue
598 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
599 return (prev_end,
600 "E261 at least two spaces before inline comment")
601 if (len(text) > 1 and text.startswith('# ')
602 or not text.startswith('# ')):
603 return start, "E262 inline comment should start with '# '"
604 else:
605 prev_end = end
608 def imports_on_separate_lines(logical_line):
609 r"""
610 Imports should usually be on separate lines.
612 Okay: import os\nimport sys
613 E401: import sys, os
615 Okay: from subprocess import Popen, PIPE
616 Okay: from myclas import MyClass
617 Okay: from foo.bar.yourclass import YourClass
618 Okay: import myclass
619 Okay: import foo.bar.yourclass
621 line = logical_line
622 if line.startswith('import '):
623 found = line.find(',')
624 if found > -1:
625 return found, "E401 multiple imports on one line"
628 def compound_statements(logical_line):
629 r"""
630 Compound statements (multiple statements on the same line) are
631 generally discouraged.
633 While sometimes it's okay to put an if/for/while with a small body
634 on the same line, never do this for multi-clause statements. Also
635 avoid folding such long lines!
637 Okay: if foo == 'blah':\n do_blah_thing()
638 Okay: do_one()
639 Okay: do_two()
640 Okay: do_three()
642 E701: if foo == 'blah': do_blah_thing()
643 E701: for x in lst: total += x
644 E701: while t < 10: t = delay()
645 E701: if foo == 'blah': do_blah_thing()
646 E701: else: do_non_blah_thing()
647 E701: try: something()
648 E701: finally: cleanup()
649 E701: if foo == 'blah': one(); two(); three()
651 E702: do_one(); do_two(); do_three()
653 line = logical_line
654 found = line.find(':')
655 if -1 < found < len(line) - 1:
656 before = line[:found]
657 if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
658 before.count('[') <= before.count(']') and # [1:2] (slice)
659 not re.search(r'\blambda\b', before)): # lambda x: x
660 return found, "E701 multiple statements on one line (colon)"
661 found = line.find(';')
662 if -1 < found:
663 return found, "E702 multiple statements on one line (semicolon)"
666 def python_3000_has_key(logical_line):
668 The {}.has_key() method will be removed in the future version of
669 Python. Use the 'in' operation instead, like:
670 d = {"a": 1, "b": 2}
671 if "b" in d:
672 print d["b"]
674 pos = logical_line.find('.has_key(')
675 if pos > -1:
676 return pos, "W601 .has_key() is deprecated, use 'in'"
679 def python_3000_raise_comma(logical_line):
681 When raising an exception, use "raise ValueError('message')"
682 instead of the older form "raise ValueError, 'message'".
684 The paren-using form is preferred because when the exception arguments
685 are long or include string formatting, you don't need to use line
686 continuation characters thanks to the containing parentheses. The older
687 form will be removed in Python 3000.
689 match = RAISE_COMMA_REGEX.match(logical_line)
690 if match:
691 return match.start(1), "W602 deprecated form of raising exception"
694 def python_3000_not_equal(logical_line):
696 != can also be written <>, but this is an obsolete usage kept for
697 backwards compatibility only. New code should always use !=.
698 The older syntax is removed in Python 3000.
700 pos = logical_line.find('<>')
701 if pos > -1:
702 return pos, "W603 '<>' is deprecated, use '!='"
705 def python_3000_backticks(logical_line):
707 Backticks are removed in Python 3000.
708 Use repr() instead.
710 pos = logical_line.find('`')
711 if pos > -1:
712 return pos, "W604 backticks are deprecated, use 'repr()'"
715 ##############################################################################
716 # Helper functions
717 ##############################################################################
720 if '' == ''.encode():
721 # Python 2: implicit encoding.
722 def readlines(filename):
723 return open(filename).readlines()
724 else:
725 # Python 3: decode to latin-1.
726 # This function is lazy, it does not read the encoding declaration.
727 # XXX: use tokenize.detect_encoding()
728 def readlines(filename):
729 return open(filename, encoding='latin-1').readlines()
732 def expand_indent(line):
734 Return the amount of indentation.
735 Tabs are expanded to the next multiple of 8.
737 >>> expand_indent(' ')
739 >>> expand_indent('\\t')
741 >>> expand_indent(' \\t')
743 >>> expand_indent(' \\t')
745 >>> expand_indent(' \\t')
748 result = 0
749 for char in line:
750 if char == '\t':
751 result = result // 8 * 8 + 8
752 elif char == ' ':
753 result += 1
754 else:
755 break
756 return result
759 def mute_string(text):
761 Replace contents with 'xxx' to prevent syntax matching.
763 >>> mute_string('"abc"')
764 '"xxx"'
765 >>> mute_string("'''abc'''")
766 "'''xxx'''"
767 >>> mute_string("r'abc'")
768 "r'xxx'"
770 start = 1
771 end = len(text) - 1
772 # String modifiers (e.g. u or r)
773 if text.endswith('"'):
774 start += text.index('"')
775 elif text.endswith("'"):
776 start += text.index("'")
777 # Triple quotes
778 if text.endswith('"""') or text.endswith("'''"):
779 start += 2
780 end -= 2
781 return text[:start] + 'x' * (end - start) + text[end:]
784 def message(text):
785 """Print a message."""
786 # print >> sys.stderr, options.prog + ': ' + text
787 # print >> sys.stderr, text
788 print(text)
791 ##############################################################################
792 # Framework to run all checks
793 ##############################################################################
796 def find_checks(argument_name):
798 Find all globally visible functions where the first argument name
799 starts with argument_name.
801 checks = []
802 for name, function in globals().items():
803 if not inspect.isfunction(function):
804 continue
805 args = inspect.getargspec(function)[0]
806 if args and args[0].startswith(argument_name):
807 codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '')
808 for code in codes or ['']:
809 if not code or not ignore_code(code):
810 checks.append((name, function, args))
811 break
812 checks.sort()
813 return checks
816 class Checker(object):
818 Load a Python source file, tokenize it, check coding style.
821 def __init__(self, filename, lines=None):
822 self.filename = filename
823 if filename is None:
824 self.filename = 'stdin'
825 self.lines = lines or []
826 elif lines is None:
827 self.lines = readlines(filename)
828 else:
829 self.lines = lines
830 options.counters['physical lines'] += len(self.lines)
832 def readline(self):
834 Get the next line from the input buffer.
836 self.line_number += 1
837 if self.line_number > len(self.lines):
838 return ''
839 return self.lines[self.line_number - 1]
841 def readline_check_physical(self):
843 Check and return the next physical line. This method can be
844 used to feed tokenize.generate_tokens.
846 line = self.readline()
847 if line:
848 self.check_physical(line)
849 return line
851 def run_check(self, check, argument_names):
853 Run a check plugin.
855 arguments = []
856 for name in argument_names:
857 arguments.append(getattr(self, name))
858 return check(*arguments)
860 def check_physical(self, line):
862 Run all physical checks on a raw input line.
864 self.physical_line = line
865 if self.indent_char is None and len(line) and line[0] in ' \t':
866 self.indent_char = line[0]
867 for name, check, argument_names in options.physical_checks:
868 result = self.run_check(check, argument_names)
869 if result is not None:
870 offset, text = result
871 self.report_error(self.line_number, offset, text, check)
873 def build_tokens_line(self):
875 Build a logical line from tokens.
877 self.mapping = []
878 logical = []
879 length = 0
880 previous = None
881 for token in self.tokens:
882 token_type, text = token[0:2]
883 if token_type in SKIP_TOKENS:
884 continue
885 if token_type == tokenize.STRING:
886 text = mute_string(text)
887 if previous:
888 end_line, end = previous[3]
889 start_line, start = token[2]
890 if end_line != start_line: # different row
891 prev_text = self.lines[end_line - 1][end - 1]
892 if prev_text == ',' or (prev_text not in '{[('
893 and text not in '}])'):
894 logical.append(' ')
895 length += 1
896 elif end != start: # different column
897 fill = self.lines[end_line - 1][end:start]
898 logical.append(fill)
899 length += len(fill)
900 self.mapping.append((length, token))
901 logical.append(text)
902 length += len(text)
903 previous = token
904 self.logical_line = ''.join(logical)
905 assert self.logical_line.lstrip() == self.logical_line
906 assert self.logical_line.rstrip() == self.logical_line
908 def check_logical(self):
910 Build a line from tokens and run all logical checks on it.
912 options.counters['logical lines'] += 1
913 self.build_tokens_line()
914 first_line = self.lines[self.mapping[0][1][2][0] - 1]
915 indent = first_line[:self.mapping[0][1][2][1]]
916 self.previous_indent_level = self.indent_level
917 self.indent_level = expand_indent(indent)
918 if options.verbose >= 2:
919 print(self.logical_line[:80].rstrip())
920 for name, check, argument_names in options.logical_checks:
921 if options.verbose >= 4:
922 print(' ' + name)
923 result = self.run_check(check, argument_names)
924 if result is not None:
925 offset, text = result
926 if isinstance(offset, tuple):
927 original_number, original_offset = offset
928 else:
929 for token_offset, token in self.mapping:
930 if offset >= token_offset:
931 original_number = token[2][0]
932 original_offset = (token[2][1]
933 + offset - token_offset)
934 self.report_error(original_number, original_offset,
935 text, check)
936 self.previous_logical = self.logical_line
938 def check_all(self, expected=None, line_offset=0):
940 Run all checks on the input file.
942 self.expected = expected or ()
943 self.line_offset = line_offset
944 self.line_number = 0
945 self.file_errors = 0
946 self.indent_char = None
947 self.indent_level = 0
948 self.previous_logical = ''
949 self.blank_lines = 0
950 self.blank_lines_before_comment = 0
951 self.tokens = []
952 parens = 0
953 for token in tokenize.generate_tokens(self.readline_check_physical):
954 if options.verbose >= 3:
955 if token[2][0] == token[3][0]:
956 pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
957 else:
958 pos = 'l.%s' % token[3][0]
959 print('l.%s\t%s\t%s\t%r' %
960 (token[2][0], pos, tokenize.tok_name[token[0]], token[1]))
961 self.tokens.append(token)
962 token_type, text = token[0:2]
963 if token_type == tokenize.OP and text in '([{':
964 parens += 1
965 if token_type == tokenize.OP and text in '}])':
966 parens -= 1
967 if token_type == tokenize.NEWLINE and not parens:
968 self.check_logical()
969 self.blank_lines = 0
970 self.blank_lines_before_comment = 0
971 self.tokens = []
972 if token_type == tokenize.NL and not parens:
973 if len(self.tokens) <= 1:
974 # The physical line contains only this token.
975 self.blank_lines += 1
976 self.tokens = []
977 if token_type == tokenize.COMMENT:
978 source_line = token[4]
979 token_start = token[2][1]
980 if source_line[:token_start].strip() == '':
981 self.blank_lines_before_comment = max(self.blank_lines,
982 self.blank_lines_before_comment)
983 self.blank_lines = 0
984 if text.endswith('\n') and not parens:
985 # The comment also ends a physical line. This works around
986 # Python < 2.6 behaviour, which does not generate NL after
987 # a comment which is on a line by itself.
988 self.tokens = []
989 return self.file_errors
991 def report_error(self, line_number, offset, text, check):
993 Report an error, according to options.
995 code = text[:4]
996 if ignore_code(code):
997 return
998 if options.quiet == 1 and not self.file_errors:
999 message(self.filename)
1000 if code in options.counters:
1001 options.counters[code] += 1
1002 else:
1003 options.counters[code] = 1
1004 options.messages[code] = text[5:]
1005 if options.quiet or code in self.expected:
1006 # Don't care about expected errors or warnings
1007 return
1008 self.file_errors += 1
1009 if options.counters[code] == 1 or options.repeat:
1010 message("%s:%s:%d: %s" %
1011 (self.filename, self.line_offset + line_number,
1012 offset + 1, text))
1013 if options.show_source:
1014 line = self.lines[line_number - 1]
1015 message(line.rstrip())
1016 message(' ' * offset + '^')
1017 if options.show_pep8:
1018 message(check.__doc__.lstrip('\n').rstrip())
1021 def input_file(filename):
1023 Run all checks on a Python source file.
1025 if options.verbose:
1026 message('checking ' + filename)
1027 errors = Checker(filename).check_all()
1030 def input_dir(dirname, runner=None):
1032 Check all Python source files in this directory and all subdirectories.
1034 dirname = dirname.rstrip('/')
1035 if excluded(dirname):
1036 return
1037 if runner is None:
1038 runner = input_file
1039 for root, dirs, files in os.walk(dirname):
1040 if options.verbose:
1041 message('directory ' + root)
1042 options.counters['directories'] += 1
1043 dirs.sort()
1044 for subdir in dirs:
1045 if excluded(subdir):
1046 dirs.remove(subdir)
1047 files.sort()
1048 for filename in files:
1049 if filename_match(filename) and not excluded(filename):
1050 options.counters['files'] += 1
1051 runner(os.path.join(root, filename))
1054 def excluded(filename):
1056 Check if options.exclude contains a pattern that matches filename.
1058 basename = os.path.basename(filename)
1059 for pattern in options.exclude:
1060 if fnmatch(basename, pattern):
1061 # print basename, 'excluded because it matches', pattern
1062 return True
1065 def filename_match(filename):
1067 Check if options.filename contains a pattern that matches filename.
1068 If options.filename is unspecified, this always returns True.
1070 if not options.filename:
1071 return True
1072 for pattern in options.filename:
1073 if fnmatch(filename, pattern):
1074 return True
1077 def ignore_code(code):
1079 Check if options.ignore contains a prefix of the error code.
1080 If options.select contains a prefix of the error code, do not ignore it.
1082 for select in options.select:
1083 if code.startswith(select):
1084 return False
1085 for ignore in options.ignore:
1086 if code.startswith(ignore):
1087 return True
1090 def reset_counters():
1091 for key in list(options.counters.keys()):
1092 if key not in BENCHMARK_KEYS:
1093 del options.counters[key]
1094 options.messages = {}
1097 def get_error_statistics():
1098 """Get error statistics."""
1099 return get_statistics("E")
1102 def get_warning_statistics():
1103 """Get warning statistics."""
1104 return get_statistics("W")
1107 def get_statistics(prefix=''):
1109 Get statistics for message codes that start with the prefix.
1111 prefix='' matches all errors and warnings
1112 prefix='E' matches all errors
1113 prefix='W' matches all warnings
1114 prefix='E4' matches all errors that have to do with imports
1116 stats = []
1117 keys = list(options.messages.keys())
1118 keys.sort()
1119 for key in keys:
1120 if key.startswith(prefix):
1121 stats.append('%-7s %s %s' %
1122 (options.counters[key], key, options.messages[key]))
1123 return stats
1126 def get_count(prefix=''):
1127 """Return the total count of errors and warnings."""
1128 keys = list(options.messages.keys())
1129 count = 0
1130 for key in keys:
1131 if key.startswith(prefix):
1132 count += options.counters[key]
1133 return count
1136 def print_statistics(prefix=''):
1137 """Print overall statistics (number of errors and warnings)."""
1138 for line in get_statistics(prefix):
1139 print(line)
1142 def print_benchmark(elapsed):
1144 Print benchmark numbers.
1146 print('%-7.2f %s' % (elapsed, 'seconds elapsed'))
1147 for key in BENCHMARK_KEYS:
1148 print('%-7d %s per second (%d total)' % (
1149 options.counters[key] / elapsed, key,
1150 options.counters[key]))
1153 def run_tests(filename):
1155 Run all the tests from a file.
1157 A test file can provide many tests. Each test starts with a declaration.
1158 This declaration is a single line starting with '#:'.
1159 It declares codes of expected failures, separated by spaces or 'Okay'
1160 if no failure is expected.
1161 If the file does not contain such declaration, it should pass all tests.
1162 If the declaration is empty, following lines are not checked, until next
1163 declaration.
1165 Examples:
1167 * Only E224 and W701 are expected: #: E224 W701
1168 * Following example is conform: #: Okay
1169 * Don't check these lines: #:
1171 lines = readlines(filename) + ['#:\n']
1172 line_offset = 0
1173 codes = ['Okay']
1174 testcase = []
1175 for index, line in enumerate(lines):
1176 if not line.startswith('#:'):
1177 if codes:
1178 # Collect the lines of the test case
1179 testcase.append(line)
1180 continue
1181 if codes and index > 0:
1182 label = '%s:%s:1' % (filename, line_offset + 1)
1183 codes = [c for c in codes if c != 'Okay']
1184 # Run the checker
1185 errors = Checker(filename, testcase).check_all(codes, line_offset)
1186 # Check if the expected errors were found
1187 for code in codes:
1188 if not options.counters.get(code):
1189 errors += 1
1190 message('%s: error %s not found' % (label, code))
1191 if options.verbose and not errors:
1192 message('%s: passed (%s)' % (label, ' '.join(codes)))
1193 # Keep showing errors for multiple tests
1194 reset_counters()
1195 # output the real line numbers
1196 line_offset = index
1197 # configure the expected errors
1198 codes = line.split()[1:]
1199 # empty the test case buffer
1200 del testcase[:]
1203 def selftest():
1205 Test all check functions with test cases in docstrings.
1207 count_passed = 0
1208 count_failed = 0
1209 checks = options.physical_checks + options.logical_checks
1210 for name, check, argument_names in checks:
1211 for line in check.__doc__.splitlines():
1212 line = line.lstrip()
1213 match = SELFTEST_REGEX.match(line)
1214 if match is None:
1215 continue
1216 code, source = match.groups()
1217 checker = Checker(None)
1218 for part in source.split(r'\n'):
1219 part = part.replace(r'\t', '\t')
1220 part = part.replace(r'\s', ' ')
1221 checker.lines.append(part + '\n')
1222 options.quiet = 2
1223 checker.check_all()
1224 error = None
1225 if code == 'Okay':
1226 if len(options.counters) > len(BENCHMARK_KEYS):
1227 codes = [key for key in options.counters.keys()
1228 if key not in BENCHMARK_KEYS]
1229 error = "incorrectly found %s" % ', '.join(codes)
1230 elif not options.counters.get(code):
1231 error = "failed to find %s" % code
1232 # Reset the counters
1233 reset_counters()
1234 if not error:
1235 count_passed += 1
1236 else:
1237 count_failed += 1
1238 if len(checker.lines) == 1:
1239 print("pep8.py: %s: %s" %
1240 (error, checker.lines[0].rstrip()))
1241 else:
1242 print("pep8.py: %s:" % error)
1243 for line in checker.lines:
1244 print(line.rstrip())
1245 if options.verbose:
1246 print("%d passed and %d failed." % (count_passed, count_failed))
1247 if count_failed:
1248 print("Test failed.")
1249 else:
1250 print("Test passed.")
1253 def process_options(arglist=None):
1255 Process options passed either via arglist or via command line args.
1257 global options, args
1258 parser = OptionParser(version=__version__,
1259 usage="%prog [options] input ...")
1260 parser.add_option('-v', '--verbose', default=0, action='count',
1261 help="print status messages, or debug with -vv")
1262 parser.add_option('-q', '--quiet', default=0, action='count',
1263 help="report only file names, or nothing with -qq")
1264 parser.add_option('-r', '--repeat', action='store_true',
1265 help="show all occurrences of the same error")
1266 parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
1267 help="exclude files or directories which match these "
1268 "comma separated patterns (default: %s)" %
1269 DEFAULT_EXCLUDE)
1270 parser.add_option('--filename', metavar='patterns', default='*.py',
1271 help="when parsing directories, only check filenames "
1272 "matching these comma separated patterns (default: "
1273 "*.py)")
1274 parser.add_option('--select', metavar='errors', default='',
1275 help="select errors and warnings (e.g. E,W6)")
1276 parser.add_option('--ignore', metavar='errors', default='',
1277 help="skip errors and warnings (e.g. E4,W)")
1278 parser.add_option('--show-source', action='store_true',
1279 help="show source code for each error")
1280 parser.add_option('--show-pep8', action='store_true',
1281 help="show text of PEP 8 for each error")
1282 parser.add_option('--statistics', action='store_true',
1283 help="count errors and warnings")
1284 parser.add_option('--count', action='store_true',
1285 help="print total number of errors and warnings "
1286 "to standard error and set exit code to 1 if "
1287 "total is not null")
1288 parser.add_option('--benchmark', action='store_true',
1289 help="measure processing speed")
1290 parser.add_option('--testsuite', metavar='dir',
1291 help="run regression tests from dir")
1292 parser.add_option('--doctest', action='store_true',
1293 help="run doctest on myself")
1294 options, args = parser.parse_args(arglist)
1295 if options.testsuite:
1296 args.append(options.testsuite)
1297 if not args and not options.doctest:
1298 parser.error('input not specified')
1299 options.prog = os.path.basename(sys.argv[0])
1300 options.exclude = options.exclude.split(',')
1301 for index in range(len(options.exclude)):
1302 options.exclude[index] = options.exclude[index].rstrip('/')
1303 if options.filename:
1304 options.filename = options.filename.split(',')
1305 if options.select:
1306 options.select = options.select.split(',')
1307 else:
1308 options.select = []
1309 if options.ignore:
1310 options.ignore = options.ignore.split(',')
1311 elif options.select:
1312 # Ignore all checks which are not explicitly selected
1313 options.ignore = ['']
1314 elif options.testsuite or options.doctest:
1315 # For doctest and testsuite, all checks are required
1316 options.ignore = []
1317 else:
1318 # The default choice: ignore controversial checks
1319 options.ignore = DEFAULT_IGNORE.split(',')
1320 options.physical_checks = find_checks('physical_line')
1321 options.logical_checks = find_checks('logical_line')
1322 options.counters = dict.fromkeys(BENCHMARK_KEYS, 0)
1323 options.messages = {}
1324 return options, args
1327 def _main():
1329 Parse options and run checks on Python source.
1331 options, args = process_options()
1332 if options.doctest:
1333 import doctest
1334 doctest.testmod(verbose=options.verbose)
1335 selftest()
1336 if options.testsuite:
1337 runner = run_tests
1338 else:
1339 runner = input_file
1340 start_time = time.time()
1341 for path in args:
1342 if os.path.isdir(path):
1343 input_dir(path, runner=runner)
1344 elif not excluded(path):
1345 options.counters['files'] += 1
1346 runner(path)
1347 elapsed = time.time() - start_time
1348 if options.statistics:
1349 print_statistics()
1350 if options.benchmark:
1351 print_benchmark(elapsed)
1352 count = get_count()
1353 if count:
1354 if options.count:
1355 sys.stderr.write(str(count) + '\n')
1356 sys.exit(1)
1359 if __name__ == '__main__':
1360 _main()